pre-migration commit

This commit is contained in:
2026-04-26 15:08:35 -07:00
parent c724cb36b1
commit 39ee857559
32 changed files with 335 additions and 106 deletions
-2146
View File
File diff suppressed because it is too large Load Diff
-2457
View File
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
-373
View File
@@ -1,373 +0,0 @@
from __future__ import annotations
import sys
from urllib.parse import urlparse
from typing import Any, Dict, List, Optional
from ProviderCore.base import Provider, SearchResult
from SYS.logger import log, debug
from tool.playwright import PlaywrightTool
class Bandcamp(Provider):
"""Search provider for Bandcamp."""
TABLE_AUTO_STAGES = {
"bandcamp": ["download-file"],
}
AUTO_STAGE_USE_SELECTION_ARGS = True
@staticmethod
def _download_selection_args(target_url: str, media_type: str) -> Optional[List[str]]:
target = str(target_url or "").strip()
kind = str(media_type or "").strip().lower()
if not target or kind == "artist":
return None
return ["-url", target]
@staticmethod
def _base_url(raw_url: str) -> str:
"""Normalize a Bandcamp URL down to scheme://netloc."""
text = str(raw_url or "").strip()
if not text:
return ""
try:
parsed = urlparse(text)
if not parsed.scheme or not parsed.netloc:
return text
return f"{parsed.scheme}://{parsed.netloc}"
except Exception:
return text
@classmethod
def _discography_url(cls, raw_url: str) -> str:
base = cls._base_url(raw_url)
if not base:
return ""
# Bandcamp discography lives under /music.
return base.rstrip("/") + "/music"
def _scrape_artist_page(self,
page: Any,
artist_url: str,
limit: int = 50) -> List[SearchResult]:
"""Scrape an artist page for albums/tracks (discography)."""
base = self._base_url(artist_url)
discography_url = self._discography_url(artist_url)
if not base or not discography_url:
return []
debug(f"[bandcamp] Scraping artist page: {discography_url}")
page.goto(discography_url)
page.wait_for_load_state("domcontentloaded")
results: List[SearchResult] = []
cards = page.query_selector_all("li.music-grid-item") or []
if not cards:
# Fallback selector
cards = page.query_selector_all(".music-grid-item") or []
for item in cards[:limit]:
try:
link = item.query_selector("a")
if not link:
continue
href = link.get_attribute("href") or ""
href = str(href).strip()
if not href:
continue
if href.startswith("/"):
target = base.rstrip("/") + href
elif href.startswith("http://") or href.startswith("https://"):
target = href
else:
target = base.rstrip("/") + "/" + href
title_node = item.query_selector("p.title"
) or item.query_selector(".title")
title = title_node.inner_text().strip() if title_node else ""
if title:
title = " ".join(title.split())
if not title:
title = target.rsplit("/", 1)[-1]
kind = (
"album" if "/album/" in target else
("track" if "/track/" in target else "item")
)
selection_args = self._download_selection_args(target, kind)
selection_action = (["download-file"] + selection_args) if selection_args else None
results.append(
SearchResult(
table="bandcamp",
title=title,
path=target,
detail="",
annotations=[kind],
media_kind="audio",
columns=[
("Title",
title),
("Type",
kind),
("Url",
target),
],
full_metadata={
"type": kind,
"url": target,
"artist_url": base,
},
selection_args=selection_args,
selection_action=selection_action,
)
)
except Exception as exc:
debug(f"[bandcamp] Error parsing artist item: {exc}")
return results
def selector(
self,
selected_items: List[Any],
*,
ctx: Any,
stage_is_last: bool = True,
**_kwargs: Any
) -> bool:
"""Handle Bandcamp `@N` selection.
If the selected item is an ARTIST result, selecting it auto-expands into
a discography table by scraping the artist URL.
"""
if not stage_is_last:
return False
# Playwright is required; proceed to handle artist selection
# Only handle artist selections.
chosen: List[Dict[str, Any]] = []
for item in selected_items or []:
payload: Dict[str,
Any] = {}
if isinstance(item, dict):
payload = item
else:
try:
if hasattr(item, "to_dict"):
payload = item.to_dict() # type: ignore[assignment]
except Exception:
payload = {}
if not payload:
try:
payload = {
"title": getattr(item,
"title",
None),
"url": getattr(item,
"url",
None),
"path": getattr(item,
"path",
None),
"metadata": getattr(item,
"metadata",
None),
"extra": getattr(item,
"extra",
None),
}
except Exception:
payload = {}
meta = payload.get("metadata") or payload.get("full_metadata") or {}
if not isinstance(meta, dict):
meta = {}
extra = payload.get("extra")
if isinstance(extra, dict):
meta = {
**meta,
**extra
}
type_val = str(meta.get("type") or "").strip().lower()
if type_val != "artist":
continue
title = str(payload.get("title") or "").strip()
url_val = str(
payload.get("url") or payload.get("path") or meta.get("url") or ""
).strip()
base = self._base_url(url_val)
if not base:
continue
chosen.append(
{
"title": title,
"url": base,
"location": str(meta.get("artist") or "").strip()
}
)
if not chosen:
return False
# Build a new table from artist discography.
try:
from SYS.result_table import Table
from SYS.rich_display import stdout_console
except Exception:
return False
artist_title = chosen[0].get("title") or "artist"
artist_url = chosen[0].get("url") or ""
try:
tool = PlaywrightTool({})
tool.require()
with tool.open_page(headless=True) as page:
discography = self._scrape_artist_page(page, artist_url, limit=50)
except Exception as exc:
print(f"bandcamp artist lookup failed: {exc}\n")
return True
table = Table(f"Bandcamp: artist:{artist_title}")._perseverance(True)
table.set_table("bandcamp")
try:
table.set_value_case("preserve")
except Exception:
pass
results_payload: List[Dict[str, Any]] = []
for r in discography:
table.add_result(r)
try:
results_payload.append(r.to_dict())
except Exception:
results_payload.append(
{
"table": "bandcamp",
"title": getattr(r,
"title",
""),
"path": getattr(r,
"path",
""),
}
)
try:
ctx.set_last_result_table(table, results_payload)
ctx.set_current_stage_table(table)
except Exception:
pass
try:
stdout_console().print()
stdout_console().print(table)
except Exception:
pass
return True
def search(
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str,
Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
try:
tool = PlaywrightTool({})
tool.require()
with tool.open_page(headless=True) as page:
if query.strip().lower().startswith("artist:"):
artist_name = query[7:].strip().strip('"')
search_url = f"https://bandcamp.com/search?q={artist_name}&item_type=b"
else:
search_url = f"https://bandcamp.com/search?q={query}&item_type=a"
results = self._scrape_url(page, search_url, limit)
return results
except Exception as exc:
log(f"[bandcamp] Search error: {exc}", file=sys.stderr)
return []
def _scrape_url(self, page: Any, url: str, limit: int) -> List[SearchResult]:
debug(f"[bandcamp] Scraping: {url}")
page.goto(url)
page.wait_for_load_state("domcontentloaded")
results: List[SearchResult] = []
search_results = page.query_selector_all(".searchresult")
if not search_results:
return results
for item in search_results[:limit]:
try:
heading = item.query_selector(".heading")
if not heading:
continue
link = heading.query_selector("a")
if not link:
continue
title = link.inner_text().strip()
target_url = link.get_attribute("href")
base_url = self._base_url(str(target_url or ""))
subhead = item.query_selector(".subhead")
artist = subhead.inner_text().strip() if subhead else "Unknown"
itemtype = item.query_selector(".itemtype")
media_type = itemtype.inner_text().strip() if itemtype else "album"
selection_args = self._download_selection_args(str(target_url or ""), media_type)
selection_action = (["download-file"] + selection_args) if selection_args else None
results.append(
SearchResult(
table="bandcamp",
title=title,
path=target_url,
detail=f"By: {artist}",
annotations=[media_type],
media_kind="audio",
columns=[
("Title",
title),
("Location",
artist),
("Type",
media_type),
("Url",
str(target_url or "")),
],
full_metadata={
"artist": artist,
"type": media_type,
"url": str(target_url or ""),
"artist_url": base_url,
},
selection_args=selection_args,
selection_action=selection_action,
)
)
except Exception as exc:
debug(f"[bandcamp] Error parsing result: {exc}")
return results
def validate(self) -> bool:
# Playwright is required for the provider to function
return True
-231
View File
@@ -1,231 +0,0 @@
from __future__ import annotations
import os
import sys
from typing import Any, Dict, List, Optional
from ProviderCore.base import Provider
from SYS.logger import log
def _pick_provider_config(config: Any) -> Dict[str, Any]:
if not isinstance(config, dict):
return {}
provider = config.get("provider")
if not isinstance(provider, dict):
return {}
entry = provider.get("file.io")
if isinstance(entry, dict):
return entry
return {}
def _extract_link(payload: Any) -> Optional[str]:
if isinstance(payload, dict):
for key in ("link", "url", "downloadLink", "download_url"):
val = payload.get(key)
if isinstance(val, str) and val.strip().startswith(("http://", "https://")):
return val.strip()
for nested_key in ("data", "file", "result"):
nested = payload.get(nested_key)
found = _extract_link(nested)
if found:
return found
return None
def _extract_key(payload: Any) -> Optional[str]:
if isinstance(payload, dict):
for key in ("key", "id", "uuid"):
val = payload.get(key)
if isinstance(val, str) and val.strip():
return val.strip()
for nested_key in ("data", "file", "result"):
nested = payload.get(nested_key)
found = _extract_key(nested)
if found:
return found
return None
class FileIO(Provider):
"""File provider for file.io."""
PLUGIN_NAME = "file.io"
@classmethod
def config_schema(cls) -> List[Dict[str, Any]]:
return [
{
"key": "api_key",
"label": "API Key",
"default": "",
"secret": True
},
{
"key": "expires",
"label": "Default Expiration (e.g. 1w)",
"default": "1w"
},
{
"key": "maxDownloads",
"label": "Max Downloads",
"default": 1
},
{
"key": "autoDelete",
"label": "Auto Delete",
"default": True
}
]
def __init__(self, config: Optional[Dict[str, Any]] = None):
super().__init__(config)
conf = _pick_provider_config(self.config)
self._base_url = str(conf.get("base_url")
or "https://file.io").strip().rstrip("/")
self._api_key = conf.get("api_key")
self._default_expires = conf.get("expires")
self._default_max_downloads = conf.get("maxDownloads")
if self._default_max_downloads is None:
self._default_max_downloads = conf.get("max_downloads")
self._default_auto_delete = conf.get("autoDelete")
if self._default_auto_delete is None:
self._default_auto_delete = conf.get("auto_delete")
def validate(self) -> bool:
return True
def upload(self, file_path: str, **kwargs: Any) -> str:
from API.HTTP import HTTPClient
from SYS.models import ProgressFileReader
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
data: Dict[str,
Any] = {}
expires = kwargs.get("expires", self._default_expires)
max_downloads = kwargs.get(
"maxDownloads",
kwargs.get("max_downloads",
self._default_max_downloads)
)
auto_delete = kwargs.get(
"autoDelete",
kwargs.get("auto_delete",
self._default_auto_delete)
)
if expires not in (None, ""):
data["expires"] = expires
if max_downloads not in (None, ""):
data["maxDownloads"] = max_downloads
if auto_delete not in (None, ""):
data["autoDelete"] = auto_delete
headers: Dict[str,
str] = {
"User-Agent": "Medeia-Macina/1.0",
"Accept": "application/json"
}
if isinstance(self._api_key, str) and self._api_key.strip():
# Some file.io plans use bearer tokens; keep optional.
headers["Authorization"] = f"Bearer {self._api_key.strip()}"
try:
with HTTPClient(headers=headers) as client:
with open(file_path, "rb") as handle:
filename = os.path.basename(file_path)
try:
total = os.path.getsize(file_path)
except Exception:
total = None
wrapped = ProgressFileReader(
handle,
total_bytes=total,
label="upload"
)
response = client.request(
"POST",
f"{self._base_url}/upload",
data=data or None,
files={
"file": (filename,
wrapped)
},
follow_redirects=True,
raise_for_status=False,
)
if response.status_code >= 400:
location = response.headers.get("location"
) or response.headers.get("Location")
ct = response.headers.get("content-type"
) or response.headers.get("Content-Type")
raise Exception(
f"Upload failed: {response.status_code} (content-type={ct}, location={location}) - {response.text}"
)
payload: Any
try:
payload = response.json()
except Exception:
payload = None
# If the server ignored our Accept header and returned HTML, this is almost
# certainly the wrong endpoint or an upstream block.
ct = (
response.headers.get("content-type")
or response.headers.get("Content-Type") or ""
).lower()
if (payload is None) and ("text/html" in ct):
raise Exception(
"file.io returned HTML instead of JSON; expected API response from /upload"
)
if isinstance(payload, dict) and payload.get("success") is False:
reason = payload.get("message"
) or payload.get("error") or payload.get("status")
raise Exception(str(reason or "Upload failed"))
uploaded_url = _extract_link(payload)
if not uploaded_url:
# Some APIs may return the link as plain text.
text = str(response.text or "").strip()
if text.startswith(("http://", "https://")):
uploaded_url = text
if not uploaded_url:
key = _extract_key(payload)
if key:
uploaded_url = f"{self._base_url}/{key.lstrip('/')}"
if not uploaded_url:
try:
snippet = (response.text or "").strip()
if len(snippet) > 300:
snippet = snippet[:300] + "..."
except Exception:
snippet = "<unreadable response>"
raise Exception(
f"Upload succeeded but response did not include a link (response: {snippet})"
)
try:
pipe_obj = kwargs.get("pipe_obj")
if pipe_obj is not None:
from Store import Store
Store(
self.config,
suppress_debug=True
).try_add_url_for_pipe_object(pipe_obj,
uploaded_url)
except Exception:
pass
return uploaded_url
except Exception as exc:
log(f"[file.io] Upload error: {exc}", file=sys.stderr)
raise
-193
View File
@@ -1,193 +0,0 @@
"""Example plugin template for use as a starter kit.
This minimal plugin demonstrates the typical hooks a plugin may implement:
- `validate()` to assert it's usable
- `search()` to return `SearchResult` items
- `download()` to persist a sample file (useful for local tests)
See `docs/provider_guide.md` for authoring guidance.
"""
from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, List, Optional
from ProviderCore.base import Provider, SearchResult
class HelloProvider(Provider):
"""Very small example plugin suitable as a template.
- Table name: `hello`
- Usage: `search-file -plugin hello "query"`
- Selecting a row and piping into `download-file` will call `download()`.
"""
PLUGIN_NAME = "hello"
URL = ("hello:",)
URL_DOMAINS = ()
def validate(self) -> bool:
# No configuration required; always available for testing/demo purposes.
return True
def search(
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
q = (query or "").strip()
results: List[SearchResult] = []
if not q or q in {"*", "all", "list"}:
q = "example"
# Emit up to `limit` tiny example results.
n = min(max(1, int(limit)), 3)
for i in range(1, n + 1):
title = f"{q} sample {i}"
path = f"https://example.org/{q}/{i}"
sr = SearchResult(
table="hello",
title=title,
path=path,
detail="Example provider result",
media_kind="file",
columns=[("Example", "yes")],
full_metadata={"example_index": i},
)
results.append(sr)
return results[: max(0, int(limit))]
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
"""Create a small text file to simulate a download.
This keeps the example self-contained (no network access required) and
makes it straightforward to test provider behavior with `pytest`.
"""
try:
Path(output_dir).mkdir(parents=True, exist_ok=True)
except Exception:
pass
title = str(getattr(result, "title", "hello") or "hello").strip()
safe = "".join(c if c.isalnum() or c in ("-", "_", ".") else "_" for c in title)
fname = f"{safe}.txt" if safe else "hello.txt"
dest = Path(output_dir) / fname
try:
dest.write_text(f"Hello from HelloProvider\nsource: {result.path}\n", encoding="utf-8")
return dest
except Exception:
return None
def selector(
self,
selected_items: List[Any],
*,
ctx: Any,
stage_is_last: bool = True,
**_kwargs: Any,
) -> bool:
"""Present a simple details table when a HelloProvider row is selected.
This demonstrates how providers can implement custom `@N` selection
behavior by constructing a `ResultTable`, populating it with
provider-specific rows, and instructing the CLI to show the table.
"""
if not stage_is_last:
return False
def _as_payload(item: Any) -> Dict[str, Any]:
if isinstance(item, dict):
return dict(item)
try:
if hasattr(item, "to_dict"):
maybe = item.to_dict()
if isinstance(maybe, dict):
return maybe
except Exception:
pass
payload: Dict[str, Any] = {}
try:
payload = {
"title": getattr(item, "title", None),
"path": getattr(item, "path", None),
"table": getattr(item, "table", None),
"annotations": getattr(item, "annotations", None),
"media_kind": getattr(item, "media_kind", None),
"full_metadata": getattr(item, "full_metadata", None),
}
except Exception:
payload = {}
return payload
chosen: List[Dict[str, Any]] = []
for item in selected_items or []:
payload = _as_payload(item)
meta = payload.get("full_metadata") or {}
if not isinstance(meta, dict):
meta = {}
idx = meta.get("example_index")
if idx is None:
continue
title = str(payload.get("title") or payload.get("path") or "").strip() or f"hello-{idx}"
chosen.append({"index": idx, "title": title, "path": payload.get("path")})
if not chosen:
return False
target = chosen[0]
idx = target.get("index")
title = target.get("title") or f"hello-{idx}"
try:
from SYS.result_table import Table
from SYS.rich_display import stdout_console
except Exception:
# If ResultTable isn't available, consider selection handled
return True
table = Table(f"Hello Details: {title}")._perseverance(True)
table.set_table("hello")
try:
table.set_table_metadata({"provider": "hello", "view": "details", "example_index": idx})
except Exception:
pass
table.set_source_command("download-file", [])
results_payload: List[Dict[str, Any]] = []
for part in ("a", "b"):
file_title = f"{title} - part {part}"
file_path = f"{target.get('path')}/{part}"
sr = SearchResult(
table="hello",
title=file_title,
path=file_path,
detail=f"Part {part}",
media_kind="file",
columns=[("Part", part)],
full_metadata={"part": part, "example_index": idx},
)
table.add_result(sr)
try:
results_payload.append(sr.to_dict())
except Exception:
results_payload.append({"table": sr.table, "title": sr.title, "path": sr.path})
try:
ctx.set_last_result_table(table, results_payload)
ctx.set_current_stage_table(table)
except Exception:
pass
try:
stdout_console().print()
stdout_console().print(table)
except Exception:
pass
return True
File diff suppressed because it is too large Load Diff
-1972
View File
File diff suppressed because it is too large Load Diff
-147
View File
@@ -1,147 +0,0 @@
from __future__ import annotations
from typing import Any, Dict, List, Optional
from API.loc import LOCClient
from ProviderCore.base import Provider, SearchResult
from SYS.cli_syntax import get_free_text, parse_query
from SYS.logger import log
class LOC(Provider):
"""LoC search provider.
Currently implements Chronicling America collection search via the LoC JSON API.
"""
@property
def preserve_order(self) -> bool:
return True
URL_DOMAINS = ["www.loc.gov"]
URL = URL_DOMAINS
def validate(self) -> bool:
return True
def search(
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str,
Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
_ = kwargs
parsed = parse_query(query or "")
text = get_free_text(parsed).strip()
fields = parsed.get("fields",
{}) if isinstance(parsed,
dict) else {}
# Allow explicit q: override.
q = str(fields.get("q") or text or "").strip()
if not q:
return []
# Pass through any extra filters supported by the LoC API.
extra: Dict[str,
Any] = {}
if isinstance(filters, dict):
extra.update(filters)
if isinstance(fields, dict):
for k, v in fields.items():
if k == "q":
continue
extra[str(k)] = v
client = LOCClient()
results: List[SearchResult] = []
start = 1
page_size = 25
try:
if limit and limit > 0:
page_size = max(1, min(int(limit), 50))
while len(results) < max(0, int(limit)):
payload = client.search_chronicling_america(
q,
start=start,
count=page_size,
extra_params=extra
)
items = payload.get("results")
if not isinstance(items, list) or not items:
break
for it in items:
if not isinstance(it, dict):
continue
title = str(it.get("title") or "").strip() or "(untitled)"
date = str(it.get("date") or "").strip()
url = str(it.get("url") or "").strip()
aka = it.get("aka")
if (not url) and isinstance(aka, list) and aka:
url = str(aka[0] or "").strip()
formats = it.get("online_format")
if isinstance(formats, list):
fmt_text = ", ".join([str(x) for x in formats if x])
else:
fmt_text = str(formats or "").strip()
partof = it.get("partof")
if isinstance(partof, list) and partof:
source = str(partof[-1] or "").strip()
else:
source = "Chronicling America"
detail_parts = []
if date:
detail_parts.append(date)
if source:
detail_parts.append(source)
detail = "".join(detail_parts)
annotations: List[str] = []
if date:
annotations.append(date)
if fmt_text:
annotations.append(fmt_text)
results.append(
SearchResult(
table="loc",
title=title,
path=url or title,
detail=detail,
annotations=annotations,
media_kind="document",
columns=[
("Title",
title),
("Date",
date),
("Format",
fmt_text),
("URL",
url),
],
full_metadata=it,
)
)
if len(results) >= int(limit):
break
# LoC API pagination uses sp (1-based start index).
if len(items) < page_size:
break
start += len(items)
except Exception as exc:
log(f"[loc] search failed: {exc}")
return []
return results
-877
View File
@@ -1,877 +0,0 @@
from __future__ import annotations
import mimetypes
import sys
import time
import uuid
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Tuple
from urllib.parse import quote
from API.requests_client import get_requests_session
from SYS.utils import ffprobe as probe_media_metadata
from ProviderCore.base import Provider, SearchResult
from SYS.provider_helpers import TableProviderMixin
from SYS.logger import log
_MATRIX_INIT_CHECK_CACHE: Dict[str,
Tuple[bool,
Optional[str]]] = {}
def _sniff_mime_from_header(path: Path) -> Optional[str]:
"""Best-effort MIME sniffing from file headers.
Used when the file has no/unknown extension (common for exported/temp files).
Keeps dependencies to stdlib only.
"""
try:
if not path.exists() or not path.is_file():
return None
with open(path, "rb") as handle:
header = handle.read(512)
if not header:
return None
# Images
if header.startswith(b"\xff\xd8\xff"):
return "image/jpeg"
if header.startswith(b"\x89PNG\r\n\x1a\n"):
return "image/png"
if header.startswith(b"GIF87a") or header.startswith(b"GIF89a"):
return "image/gif"
if header.startswith(b"BM"):
return "image/bmp"
if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"WEBP":
return "image/webp"
# Audio
if header.startswith(b"fLaC"):
return "audio/flac"
if header.startswith(b"OggS"):
# Could be audio or video; treat as audio unless extension suggests video.
return "audio/ogg"
if header.startswith(b"ID3"):
return "audio/mpeg"
if len(header) >= 2 and header[0] == 0xFF and (header[1] & 0xE0) == 0xE0:
return "audio/mpeg"
if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"WAVE":
return "audio/wav"
# Video
if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"AVI ":
return "video/x-msvideo"
if header.startswith(b"\x1a\x45\xdf\xa3"):
# EBML container: Matroska/WebM.
return "video/x-matroska"
if len(header) >= 12 and header[4:8] == b"ftyp":
# ISO BMFF: mp4/mov/m4a. Default to mp4; extension can refine.
return "video/mp4"
# MPEG-TS / M2TS (sync byte every 188 bytes)
try:
if path.stat().st_size >= 188 * 2 and header[0] == 0x47:
with open(path, "rb") as handle:
handle.seek(188)
b = handle.read(1)
if b == b"\x47":
return "video/mp2t"
except Exception:
pass
return None
except Exception:
return None
def _classify_matrix_upload(path: Path,
*,
explicit_mime_type: Optional[str] = None) -> Tuple[str,
str]:
"""Return (mime_type, msgtype) for Matrix uploads."""
mime_type = str(explicit_mime_type or "").strip() or None
if not mime_type:
# `mimetypes.guess_type` expects a string/URL; Path can return None on some platforms.
mime_type, _ = mimetypes.guess_type(str(path))
if not mime_type:
mime_type = _sniff_mime_from_header(path)
# Refinements based on extension for ambiguous containers.
ext = path.suffix.lower()
if ext in {".m4a",
".aac"}:
mime_type = mime_type or "audio/mp4"
if ext in {".mkv",
".webm"}:
mime_type = mime_type or "video/x-matroska"
if ext in {".ogv"}:
mime_type = mime_type or "video/ogg"
msgtype = "m.file"
if mime_type:
mt = mime_type.casefold()
if mt.startswith("image/"):
msgtype = "m.image"
elif mt.startswith("audio/"):
msgtype = "m.audio"
elif mt.startswith("video/"):
msgtype = "m.video"
# Final fallback for unknown MIME types.
if msgtype == "m.file":
audio_exts = {
".mp3",
".flac",
".wav",
".m4a",
".aac",
".ogg",
".opus",
".wma",
".mka",
".alac",
}
video_exts = {
".mp4",
".mkv",
".webm",
".mov",
".avi",
".flv",
".mpg",
".mpeg",
".ts",
".m4v",
".wmv",
".m2ts",
".mts",
".3gp",
".ogv",
}
image_exts = {".jpg",
".jpeg",
".png",
".gif",
".webp",
".bmp",
".tiff"}
if ext in audio_exts:
msgtype = "m.audio"
elif ext in video_exts:
msgtype = "m.video"
elif ext in image_exts:
msgtype = "m.image"
return (mime_type or "application/octet-stream"), msgtype
def _build_matrix_media_info(path: Path, *, mime_type: str, msgtype: str) -> Dict[str, Any]:
"""Build Matrix `info` payload with dimensions/duration when available.
Matrix clients use `info.w`/`info.h` to size image/video placeholders. Supplying
these fields keeps the preview aspect ratio aligned with the actual media.
"""
info: Dict[str, Any] = {
"mimetype": str(mime_type or "application/octet-stream"),
"size": int(path.stat().st_size),
}
metadata: Dict[str, Any] = {}
try:
metadata = probe_media_metadata(str(path)) or {}
except Exception:
metadata = {}
width: Optional[int] = None
height: Optional[int] = None
try:
raw_w = metadata.get("width") if isinstance(metadata, dict) else None
if isinstance(raw_w, (int, float)) and raw_w > 0:
width = int(raw_w)
except Exception:
width = None
try:
raw_h = metadata.get("height") if isinstance(metadata, dict) else None
if isinstance(raw_h, (int, float)) and raw_h > 0:
height = int(raw_h)
except Exception:
height = None
# Fallback for images when ffprobe metadata is unavailable.
if msgtype == "m.image" and (width is None or height is None):
try:
from PIL import Image # type: ignore
with Image.open(path) as img:
iw, ih = img.size
if isinstance(iw, int) and iw > 0:
width = iw
if isinstance(ih, int) and ih > 0:
height = ih
except Exception:
pass
if msgtype in {"m.image", "m.video"}:
if width is not None:
info["w"] = width
if height is not None:
info["h"] = height
if msgtype in {"m.video", "m.audio"}:
try:
raw_duration = metadata.get("duration") if isinstance(metadata, dict) else None
if isinstance(raw_duration, (int, float)) and raw_duration > 0:
info["duration"] = int(round(float(raw_duration) * 1000.0))
except Exception:
pass
return info
def _normalize_homeserver(value: str) -> str:
text = str(value or "").strip()
if not text:
return ""
if not text.startswith("http"):
text = f"https://{text}"
return text.rstrip("/")
def _matrix_health_check(*,
homeserver: str,
access_token: Optional[str]) -> Tuple[bool,
Optional[str]]:
"""Lightweight Matrix reachability/auth validation.
- Always checks `/versions` (no auth).
- If `access_token` is present, also checks `/whoami`.
"""
try:
base = _normalize_homeserver(homeserver)
if not base:
return False, "Matrix homeserver missing"
resp = get_requests_session().get(f"{base}/_matrix/client/versions", timeout=5)
if resp.status_code != 200:
return False, f"Homeserver returned {resp.status_code}"
if access_token:
headers = {
"Authorization": f"Bearer {access_token}"
}
resp = get_requests_session().get(
f"{base}/_matrix/client/v3/account/whoami",
headers=headers,
timeout=5
)
if resp.status_code != 200:
return False, f"Authentication failed: {resp.status_code}"
return True, None
except Exception as exc:
return False, str(exc)
class Matrix(TableProviderMixin, Provider):
"""Matrix (Element) room provider with file uploads and selection.
This provider uses the new table system (strict ResultTable adapter pattern) for
consistent room listing and selection. It exposes Matrix joined rooms as selectable
rows with metadata enrichment for:
- room_id: Unique Matrix room identifier
- room_name: Human-readable room display name
- _selection_args: For @N expansion control and upload routing
KEY FEATURES:
- Table system: Using ResultTable adapter for strict column/metadata handling
- Room discovery: search() or list_rooms() to enumerate joined rooms
- Selection integration: @N selection triggers upload_to_room() via selector()
- Deferred uploads: Files can be queued for upload to multiple rooms
- MIME detection: Automatic content type classification for Matrix msgtype
SELECTION FLOW:
1. User runs: search-file -plugin matrix "room" (or .matrix -list-rooms)
2. Results show available joined rooms
3. User selects rooms: @1 @2 (or @1,2)
4. Selection triggers upload of pending files to selected rooms
"""
EXPOSE_AS_FILE_PROVIDER = False
@classmethod
def config_schema(cls) -> List[Dict[str, Any]]:
return [
{
"key": "homeserver",
"label": "Homeserver URL",
"default": "https://matrix.org",
"required": True
},
{
"key": "access_token",
"label": "Access Token",
"default": "",
"secret": True
},
]
def __init__(self, config: Optional[Dict[str, Any]] = None):
super().__init__(config)
self._init_ok: Optional[bool] = None
self._init_reason: Optional[str] = None
matrix_conf = (
self.config.get("provider",
{}).get("matrix",
{}) if isinstance(self.config,
dict) else {}
)
homeserver = matrix_conf.get("homeserver")
access_token = matrix_conf.get("access_token")
# Not configured: keep instance but mark invalid via validate().
# Note: `room_id` is intentionally NOT required, since the CLI can prompt
# the user to select a room dynamically.
if not (homeserver and access_token):
self._init_ok = None
self._init_reason = None
return
cache_key = f"{_normalize_homeserver(str(homeserver))}|has_token:{bool(access_token)}"
cached = _MATRIX_INIT_CHECK_CACHE.get(cache_key)
if cached is None:
ok, reason = _matrix_health_check(
homeserver=str(homeserver), access_token=str(access_token) if access_token else None
)
_MATRIX_INIT_CHECK_CACHE[cache_key] = (ok, reason)
else:
ok, reason = cached
self._init_ok = ok
self._init_reason = reason
if not ok:
raise Exception(reason or "Matrix unavailable")
def validate(self) -> bool:
if not self.config:
return False
if self._init_ok is False:
return False
matrix_conf = self.config.get("provider",
{}).get("matrix",
{})
return bool(
matrix_conf.get("homeserver")
and matrix_conf.get("access_token")
)
def status_summary(self) -> Dict[str, Any]:
matrix_conf = self.config.get("provider", {}).get("matrix", {}) if isinstance(self.config, dict) else {}
homeserver = str(matrix_conf.get("homeserver") or "").strip()
room_id = str(matrix_conf.get("room_id") or "").strip()
detail = homeserver
if room_id:
detail = (detail + (" " if detail else "")) + f"room:{room_id}"
enabled = bool(self.validate())
return {
"status": "ENABLED" if enabled else "DISABLED",
"name": self.label,
"plugin": self.name,
"detail": detail or ("Connected" if enabled else "Not configured"),
}
def search(
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
"""Search/list joined Matrix rooms.
If query is empty or "*", returns all joined rooms.
Otherwise, filters rooms by name/ID matching the query.
"""
try:
rooms = self.list_rooms()
except Exception as exc:
log(f"[matrix] Failed to list rooms: {exc}", file=sys.stderr)
return []
q = (query or "").strip().lower()
needle = "" if q in {"*", "all", "list"} else q
results: List[SearchResult] = []
for room in rooms:
if len(results) >= limit:
break
room_id = room.get("room_id") or ""
room_name = room.get("name") or ""
# Filter by query if provided
if needle:
match_text = f"{room_name} {room_id}".lower()
if needle not in match_text:
continue
if not room_id:
continue
display_name = room_name or room_id
results.append(
SearchResult(
table="matrix",
title=display_name,
path=f"matrix:room:{room_id}",
detail=room_id if room_name else "",
annotations=["room"],
media_kind="folder",
columns=[
("Room", display_name),
("ID", room_id),
],
full_metadata={
"room_id": room_id,
"room_name": room_name,
"provider": "matrix",
# Selection metadata for table system and @N expansion
"_selection_args": ["-room-id", room_id],
},
)
)
return results
def _get_homeserver_and_token(self) -> Tuple[str, str]:
matrix_conf = self.config.get("provider",
{}).get("matrix",
{})
homeserver = matrix_conf.get("homeserver")
access_token = matrix_conf.get("access_token")
if not homeserver:
raise Exception("Matrix homeserver missing")
if not access_token:
raise Exception("Matrix access_token missing")
base = _normalize_homeserver(str(homeserver))
if not base:
raise Exception("Matrix homeserver missing")
return base, str(access_token)
def list_joined_room_ids(self) -> List[str]:
"""Return joined room IDs for the current user.
Uses `GET /_matrix/client/v3/joined_rooms`.
"""
base, token = self._get_homeserver_and_token()
headers = {
"Authorization": f"Bearer {token}"
}
resp = get_requests_session().get(
f"{base}/_matrix/client/v3/joined_rooms",
headers=headers,
timeout=10
)
if resp.status_code != 200:
raise Exception(f"Matrix joined_rooms failed: {resp.text}")
data = resp.json() or {}
rooms = data.get("joined_rooms") or []
out: List[str] = []
for rid in rooms:
if not isinstance(rid, str) or not rid.strip():
continue
out.append(rid.strip())
return out
def list_rooms(self,
*,
room_ids: Optional[List[str]] = None) -> List[Dict[str,
Any]]:
"""Return joined rooms, optionally limited to a subset.
Performance note: room names require additional per-room HTTP requests.
If `room_ids` is provided, only those rooms will have name lookups.
"""
base, token = self._get_homeserver_and_token()
headers = {
"Authorization": f"Bearer {token}"
}
joined = self.list_joined_room_ids()
if room_ids:
allowed = {str(v).strip().casefold()
for v in room_ids if str(v).strip()}
if allowed:
# Accept either full IDs (!id:hs) or short IDs (!id).
def _is_allowed(rid: str) -> bool:
r = str(rid or "").strip()
if not r:
return False
rc = r.casefold()
if rc in allowed:
return True
short = r.split(":", 1)[0].strip().casefold()
return bool(short) and short in allowed
joined = [rid for rid in joined if _is_allowed(rid)]
out: List[Dict[str, Any]] = []
for room_id in joined:
name = ""
# Best-effort room name lookup (safe to fail).
try:
encoded = quote(room_id, safe="")
name_resp = get_requests_session().get(
f"{base}/_matrix/client/v3/rooms/{encoded}/state/m.room.name",
headers=headers,
timeout=5,
)
if name_resp.status_code == 200:
payload = name_resp.json() or {}
maybe = payload.get("name")
if isinstance(maybe, str):
name = maybe
except Exception:
pass
out.append({
"room_id": room_id,
"name": name
})
return out
def upload_to_room(self, file_path: str, room_id: str, **kwargs: Any) -> str:
"""Upload a file and send it to a specific room."""
from SYS.models import ProgressFileReader
path = Path(file_path)
if not path.exists():
raise FileNotFoundError(f"File not found: {file_path}")
if not room_id:
raise Exception("Matrix room_id missing")
base, token = self._get_homeserver_and_token()
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/octet-stream",
}
mime_type, msgtype = _classify_matrix_upload(
path, explicit_mime_type=kwargs.get("mime_type")
)
headers["Content-Type"] = mime_type
filename = path.name
# Upload media
upload_url = f"{base}/_matrix/media/v3/upload"
with open(path, "rb") as handle:
wrapped = ProgressFileReader(
handle,
total_bytes=int(path.stat().st_size),
label="upload"
)
resp = get_requests_session().post(
upload_url,
headers=headers,
data=wrapped,
params={
"filename": filename
}
)
if resp.status_code != 200:
raise Exception(f"Matrix upload failed: {resp.text}")
content_uri = (resp.json() or {}).get("content_uri")
if not content_uri:
raise Exception("No content_uri returned")
# Build a fragment-free URL suitable for storage backends.
# `matrix.to` links use fragments (`#/...`) which some backends normalize away.
download_url_for_store = ""
try:
curi = str(content_uri or "").strip()
if curi.startswith("mxc://"):
rest = curi[len("mxc://"):]
if "/" in rest:
server_name, media_id = rest.split("/", 1)
server_name = str(server_name).strip()
media_id = str(media_id).strip()
if server_name and media_id:
download_url_for_store = f"{base}/_matrix/media/v3/download/{quote(server_name, safe='')}/{quote(media_id, safe='')}"
except Exception:
download_url_for_store = ""
info = _build_matrix_media_info(path, mime_type=mime_type, msgtype=msgtype)
payload = {
"msgtype": msgtype,
"body": filename,
"url": content_uri,
"info": info
}
# Correct Matrix client API send endpoint requires a transaction ID.
txn_id = f"mm_{int(time.time())}_{uuid.uuid4().hex[:8]}"
encoded_room = quote(str(room_id), safe="")
send_url = f"{base}/_matrix/client/v3/rooms/{encoded_room}/send/m.room.message/{txn_id}"
send_headers = {
"Authorization": f"Bearer {token}"
}
send_resp = get_requests_session().put(send_url, headers=send_headers, json=payload)
if send_resp.status_code != 200:
raise Exception(f"Matrix send message failed: {send_resp.text}")
event_id = (send_resp.json() or {}).get("event_id")
link = (
f"https://matrix.to/#/{room_id}/{event_id}"
if event_id else f"https://matrix.to/#/{room_id}"
)
# Optional: if a PipeObject is provided and it already has store+hash,
# attach the uploaded URL back to the stored file.
try:
pipe_obj = kwargs.get("pipe_obj")
if pipe_obj is not None:
from Store import Store
# Prefer the direct media download URL for storage backends.
Store(
self.config,
suppress_debug=True
).try_add_url_for_pipe_object(
pipe_obj,
download_url_for_store or link,
)
except Exception:
pass
return link
def send_text_to_room(self, text: str, room_id: str) -> str:
"""Send a plain text message to a specific room."""
message = str(text or "").strip()
if not message:
return ""
if not room_id:
raise Exception("Matrix room_id missing")
base, token = self._get_homeserver_and_token()
encoded_room = quote(str(room_id), safe="")
txn_id = f"mm_{int(time.time())}_{uuid.uuid4().hex[:8]}"
send_url = f"{base}/_matrix/client/v3/rooms/{encoded_room}/send/m.room.message/{txn_id}"
send_headers = {
"Authorization": f"Bearer {token}"
}
payload = {
"msgtype": "m.text",
"body": message
}
send_resp = get_requests_session().put(send_url, headers=send_headers, json=payload)
if send_resp.status_code != 200:
raise Exception(f"Matrix send text failed: {send_resp.text}")
event_id = (send_resp.json() or {}).get("event_id")
return (
f"https://matrix.to/#/{room_id}/{event_id}"
if event_id else f"https://matrix.to/#/{room_id}"
)
def upload(self, file_path: str, **kwargs: Any) -> str:
matrix_conf = self.config.get("provider",
{}).get("matrix",
{})
room_id = matrix_conf.get("room_id")
if not room_id:
raise Exception("Matrix room_id missing")
return self.upload_to_room(file_path, str(room_id))
def selector(
self,
selected_items: List[Any],
*,
ctx: Any,
stage_is_last: bool = True,
**_kwargs: Any
) -> bool:
"""Handle Matrix room selection via `@N`.
If the CLI has a pending upload stash, selecting a room triggers an upload.
"""
if not stage_is_last:
return False
pending = None
try:
pending = ctx.load_value("matrix_pending_uploads", default=None)
except Exception:
pending = None
pending_list = list(pending) if isinstance(pending, list) else []
if not pending_list:
return False
room_ids: List[str] = []
for item in selected_items or []:
rid = None
if isinstance(item, dict):
rid = item.get("room_id") or item.get("id")
else:
rid = getattr(item, "room_id", None) or getattr(item, "id", None)
if rid and str(rid).strip():
room_ids.append(str(rid).strip())
if not room_ids:
print("No Matrix room selected\n")
return True
any_failed = False
for room_id in room_ids:
for payload in pending_list:
try:
file_path = ""
delete_after = False
pipe_obj = None
if isinstance(payload, dict):
file_path = str(payload.get("path") or "")
delete_after = bool(payload.get("delete_after", False))
pipe_obj = payload.get("pipe_obj")
else:
file_path = str(getattr(payload, "path", "") or "")
if not file_path:
any_failed = True
continue
media_path = Path(file_path)
if not media_path.exists():
any_failed = True
print(f"Matrix upload file missing: {file_path}")
continue
link = self.upload_to_room(
str(media_path),
str(room_id),
pipe_obj=pipe_obj
)
if link:
print(link)
if delete_after:
try:
media_path.unlink(missing_ok=True) # type: ignore[arg-type]
except TypeError:
try:
if media_path.exists():
media_path.unlink()
except Exception:
pass
except Exception as exc:
any_failed = True
print(f"Matrix upload failed: {exc}")
try:
ctx.store_value("matrix_pending_uploads", [])
except Exception:
pass
if any_failed:
print("\nOne or more Matrix uploads failed\n")
return True
# Minimal provider registration for the new table system
try:
from SYS.result_table_adapters import register_plugin
from SYS.result_table_api import ResultModel, ColumnSpec, metadata_column, title_column
def _convert_search_result_to_model(sr: Any) -> ResultModel:
"""Convert Matrix SearchResult to ResultModel for strict table display."""
d = sr.to_dict() if hasattr(sr, "to_dict") else (sr if isinstance(sr, dict) else {"title": getattr(sr, "title", str(sr))})
title = d.get("title") or ""
path = d.get("path") or None
columns = d.get("columns") or getattr(sr, "columns", None) or []
# Extract metadata from columns and full_metadata
metadata: Dict[str, Any] = {}
for name, value in columns:
key = str(name or "").strip().lower()
if key in ("room_id", "room_name", "id", "name"):
metadata[key] = value
try:
fm = d.get("full_metadata") or {}
if isinstance(fm, dict):
for k, v in fm.items():
metadata[str(k).strip().lower()] = v
except Exception:
pass
return ResultModel(
title=str(title),
path=str(path) if path else None,
ext=None,
size_bytes=None,
metadata=metadata,
source="matrix"
)
def _adapter(items: Iterable[Any]) -> Iterable[ResultModel]:
"""Adapter to convert SearchResults to ResultModels."""
for it in items:
try:
yield _convert_search_result_to_model(it)
except Exception:
continue
def _has_metadata(rows: List[ResultModel], key: str) -> bool:
"""Check if any row has a given metadata key with a non-empty value."""
for row in rows:
md = row.metadata or {}
if key in md:
val = md[key]
if val is None:
continue
if isinstance(val, str) and not val.strip():
continue
return True
return False
def _columns_factory(rows: List[ResultModel]) -> List[ColumnSpec]:
"""Build column specifications from available metadata in rows."""
cols = [title_column()]
if _has_metadata(rows, "room_id"):
cols.append(metadata_column("room_id", "Room ID"))
if _has_metadata(rows, "room_name"):
cols.append(metadata_column("room_name", "Name"))
return cols
def _selection_fn(row: ResultModel) -> List[str]:
"""Return selection args for @N expansion and room selection.
Uses explicit -room-id flag to identify the selected room for file uploads.
"""
metadata = row.metadata or {}
# Check for explicit selection args first
args = metadata.get("_selection_args") or metadata.get("selection_args")
if isinstance(args, (list, tuple)) and args:
return [str(x) for x in args if x is not None]
# Fallback to room_id
room_id = metadata.get("room_id")
if room_id:
return ["-room-id", str(room_id)]
return ["-title", row.title or ""]
register_plugin(
"matrix",
_adapter,
columns=_columns_factory,
selection_fn=_selection_fn,
metadata={"description": "Matrix room provider with file uploads"},
)
except Exception:
# best-effort registration
pass
File diff suppressed because it is too large Load Diff
-459
View File
@@ -1,459 +0,0 @@
from __future__ import annotations
import sys
import hashlib
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from ProviderCore.base import Provider, SearchResult
from SYS.logger import log
from SYS.utils import format_bytes
def _get_podcastindex_credentials(config: Dict[str, Any]) -> Tuple[str, str]:
provider = config.get("provider")
if not isinstance(provider, dict):
return "", ""
entry = provider.get("podcastindex")
if not isinstance(entry, dict):
return "", ""
key = entry.get("key") or entry.get("Key") or entry.get("api_key")
secret = entry.get("secret") or entry.get("Secret") or entry.get("api_secret")
key_str = str(key or "").strip()
secret_str = str(secret or "").strip()
return key_str, secret_str
class PodcastIndex(Provider):
"""Search provider for PodcastIndex.org."""
TABLE_AUTO_STAGES = {
"podcastindex": ["download-file"],
"podcastindex.episodes": ["download-file"],
}
@staticmethod
def _format_duration(value: Any) -> str:
def _to_seconds(v: Any) -> Optional[int]:
if v is None:
return None
if isinstance(v, (int, float)):
try:
return max(0, int(v))
except Exception:
return None
if isinstance(v, str):
text = v.strip()
if not text:
return None
if text.isdigit():
try:
return max(0, int(text))
except Exception:
return None
# Accept common clock formats too.
if ":" in text:
parts = [p.strip() for p in text.split(":") if p.strip()]
if len(parts) == 2 and all(p.isdigit() for p in parts):
m, s = parts
return max(0, int(m) * 60 + int(s))
if len(parts) == 3 and all(p.isdigit() for p in parts):
h, m, s = parts
return max(0, int(h) * 3600 + int(m) * 60 + int(s))
return None
total = _to_seconds(value)
if total is None:
return "" if value is None else str(value).strip()
h = total // 3600
m = (total % 3600) // 60
s = total % 60
if h > 0:
return f"{h:d}h{m:d}m{s:d}s"
if m > 0:
return f"{m:d}m{s:d}s"
return f"{s:d}s"
@staticmethod
def _format_bytes(value: Any) -> str:
"""Format bytes using centralized utility."""
return format_bytes(value)
@staticmethod
def _format_date_from_epoch(value: Any) -> str:
if value is None:
return ""
try:
import datetime
ts = int(value)
if ts <= 0:
return ""
return datetime.datetime.utcfromtimestamp(ts).strftime("%Y-%m-%d")
except Exception:
return ""
@staticmethod
def _extract_episode_categories(ep: Dict[str, Any]) -> List[str]:
cats = ep.get("categories") or ep.get("category")
out: List[str] = []
if isinstance(cats, dict):
for v in cats.values():
if isinstance(v, str):
t = v.strip()
if t:
out.append(t)
elif isinstance(cats, list):
for v in cats:
if isinstance(v, str):
t = v.strip()
if t:
out.append(t)
elif isinstance(cats, str):
t = cats.strip()
if t:
out.append(t)
# Keep the table readable.
dedup: List[str] = []
seen: set[str] = set()
for t in out:
low = t.lower()
if low in seen:
continue
seen.add(low)
dedup.append(t)
return dedup
@staticmethod
def _looks_like_episode(item: Any) -> bool:
if not isinstance(item, dict):
return False
md = item.get("full_metadata")
if not isinstance(md, dict):
return False
enc = md.get("enclosureUrl") or md.get("enclosure_url")
if isinstance(enc, str) and enc.strip().startswith("http"):
return True
# Some pipelines may flatten episode fields.
enc2 = item.get("enclosureUrl") or item.get("url")
return isinstance(enc2, str) and enc2.strip().startswith("http")
@staticmethod
def _compute_sha256(filepath: Path) -> str:
h = hashlib.sha256()
with open(filepath, "rb") as f:
for chunk in iter(lambda: f.read(1024 * 1024), b""):
h.update(chunk)
return h.hexdigest()
def selector(
self,
selected_items: List[Any],
*,
ctx: Any,
stage_is_last: bool = True,
**_kwargs: Any,
) -> bool:
if not stage_is_last:
return False
if not selected_items:
return False
# Episode selection (terminal): download episodes to temp/output dir.
if all(self._looks_like_episode(x) for x in selected_items):
return self._handle_episode_download_selection(selected_items, ctx)
# Podcast selection (terminal): expand into episode list.
return self._handle_podcast_expand_selection(selected_items, ctx)
def _handle_podcast_expand_selection(self, selected_items: List[Any], ctx: Any) -> bool:
chosen: List[Dict[str, Any]] = [x for x in (selected_items or []) if isinstance(x, dict)]
if not chosen:
return False
key, secret = _get_podcastindex_credentials(self.config or {})
if not key or not secret:
return False
# Resolve feed id/url from the selected podcast row.
item0 = chosen[0]
feed_md = item0.get("full_metadata") if isinstance(item0.get("full_metadata"), dict) else {}
feed_title = str(item0.get("title") or feed_md.get("title") or "Podcast").strip() or "Podcast"
feed_id = None
try:
feed_id = int(feed_md.get("id")) if feed_md.get("id") is not None else None
except Exception:
feed_id = None
feed_url = str(feed_md.get("url") or item0.get("path") or "").strip()
try:
from API.podcastindex import PodcastIndexClient
client = PodcastIndexClient(key, secret)
if feed_id:
episodes = client.episodes_byfeedid(feed_id, max_results=200)
else:
episodes = client.episodes_byfeedurl(feed_url, max_results=200)
except Exception as exc:
log(f"[podcastindex] episode lookup failed: {exc}", file=sys.stderr)
return True
try:
from SYS.result_table import Table
from SYS.rich_display import stdout_console
except Exception:
return True
table = Table(f"PodcastIndex Episodes: {feed_title}")._perseverance(True)
table.set_table("podcastindex.episodes")
try:
table.set_value_case("preserve")
except Exception:
pass
results_payload: List[Dict[str, Any]] = []
for ep in episodes or []:
if not isinstance(ep, dict):
continue
ep_title = str(ep.get("title") or "").strip() or "Unknown"
enc_url = str(ep.get("enclosureUrl") or "").strip()
page_url = str(ep.get("link") or "").strip()
audio_url = enc_url or page_url
if not audio_url:
continue
duration = ep.get("duration")
size_bytes = ep.get("enclosureLength") or ep.get("enclosure_length")
published = ep.get("datePublished") or ep.get("datePublishedPretty")
published_text = self._format_date_from_epoch(published) or str(published or "").strip()
sr = SearchResult(
table="podcastindex",
title=ep_title,
path=audio_url,
detail=feed_title,
media_kind="audio",
size_bytes=int(size_bytes) if str(size_bytes or "").isdigit() else None,
columns=[
("Title", ep_title),
("Date", published_text),
("Duration", self._format_duration(duration)),
("Size", self._format_bytes(size_bytes)),
("Url", audio_url),
],
full_metadata={
**dict(ep),
"_feed": dict(feed_md) if isinstance(feed_md, dict) else {},
},
)
table.add_result(sr)
results_payload.append(sr.to_dict())
try:
ctx.set_last_result_table(table, results_payload)
ctx.set_current_stage_table(table)
except Exception:
pass
try:
stdout_console().print()
stdout_console().print(table)
except Exception:
pass
return True
def _handle_episode_download_selection(self, selected_items: List[Any], ctx: Any) -> bool:
key, secret = _get_podcastindex_credentials(self.config or {})
if not key or not secret:
return False
try:
from SYS.config import resolve_output_dir
output_dir = resolve_output_dir(self.config or {})
except Exception:
import tempfile
output_dir = Path(tempfile.gettempdir())
try:
output_dir = Path(output_dir).expanduser()
output_dir.mkdir(parents=True, exist_ok=True)
except Exception:
pass
try:
from API.HTTP import _download_direct_file
except Exception:
return True
payloads: List[Dict[str, Any]] = []
downloaded = 0
for item in selected_items:
if not isinstance(item, dict):
continue
md = item.get("full_metadata") if isinstance(item.get("full_metadata"), dict) else {}
enc_url = str(md.get("enclosureUrl") or item.get("url") or item.get("path") or "").strip()
if not enc_url or not enc_url.startswith("http"):
continue
title_hint = str(item.get("title") or md.get("title") or "episode").strip() or "episode"
try:
result_obj = _download_direct_file(
enc_url,
Path(output_dir),
quiet=False,
suggested_filename=title_hint,
)
except Exception as exc:
log(f"[podcastindex] download failed: {exc}", file=sys.stderr)
continue
downloaded_path = None
try:
downloaded_path = getattr(result_obj, "filepath", None)
except Exception:
downloaded_path = None
if downloaded_path is None:
try:
downloaded_path = getattr(result_obj, "file_path", None)
except Exception:
downloaded_path = None
if downloaded_path is None:
try:
downloaded_path = getattr(result_obj, "path", None)
except Exception:
downloaded_path = None
try:
local_path = Path(str(downloaded_path))
except Exception:
local_path = None
if local_path is None or not local_path.exists():
continue
sha256 = ""
try:
sha256 = self._compute_sha256(local_path)
except Exception:
sha256 = ""
tags: List[str] = []
tags.append(f"title:{title_hint}")
cats = self._extract_episode_categories(md) if isinstance(md, dict) else []
for c in cats[:10]:
tags.append(f"tag:{c}")
payload: Dict[str, Any] = {
"path": str(local_path),
"hash": sha256,
"title": title_hint,
"action": "provider:podcastindex.selector",
"download_mode": "file",
"store": "local",
"media_kind": "audio",
"tag": tags,
"provider": "podcastindex",
"url": enc_url,
}
if isinstance(md, dict) and md:
payload["full_metadata"] = dict(md)
payloads.append(payload)
downloaded += 1
try:
if payloads and hasattr(ctx, "set_last_result_items_only"):
ctx.set_last_result_items_only(payloads)
except Exception:
pass
if downloaded <= 0:
return True
try:
from SYS.rich_display import stdout_console
stdout_console().print(f"Downloaded {downloaded} episode(s) -> {output_dir}")
except Exception:
pass
return True
def validate(self) -> bool:
key, secret = _get_podcastindex_credentials(self.config or {})
return bool(key and secret)
def search(
self,
query: str,
limit: int = 10,
filters: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
_ = filters
_ = kwargs
key, secret = _get_podcastindex_credentials(self.config or {})
if not key or not secret:
return []
try:
from API.podcastindex import PodcastIndexClient
client = PodcastIndexClient(key, secret)
feeds = client.search_byterm(query, max_results=limit)
except Exception as exc:
log(f"[podcastindex] search failed: {exc}", file=sys.stderr)
return []
results: List[SearchResult] = []
for feed in feeds[: max(0, int(limit))]:
if not isinstance(feed, dict):
continue
title = str(feed.get("title") or "").strip() or "Unknown"
author = str(feed.get("author") or feed.get("ownerName") or "").strip()
feed_url = str(feed.get("url") or "").strip()
site_url = str(feed.get("link") or "").strip()
language = str(feed.get("language") or "").strip()
episode_count_val = feed.get("episodeCount")
episode_count = ""
if episode_count_val is not None:
try:
episode_count = str(int(episode_count_val))
except Exception:
episode_count = str(episode_count_val).strip()
path = feed_url or site_url or str(feed.get("id") or "").strip()
columns = [
("Title", title),
("Author", author),
("Episodes", episode_count),
("Lang", language),
("Feed", feed_url),
]
results.append(
SearchResult(
table="podcastindex",
title=title,
path=path,
detail=author,
media_kind="audio",
columns=columns,
full_metadata=dict(feed),
)
)
return results
-888
View File
@@ -1,888 +0,0 @@
from __future__ import annotations
import asyncio
import contextlib
import io
import logging
import os
import re
import sys
import time
from pathlib import Path
from typing import Any, Dict, List, Optional
from ProviderCore.base import Provider, SearchResult
from SYS.logger import log, debug
from SYS.models import ProgressBar
_SOULSEEK_NOISE_SUBSTRINGS = (
"search reply ticket does not match any search request",
"failed to receive transfer ticket on file connection",
"aioslsk.exceptions.ConnectionReadError",
)
@contextlib.asynccontextmanager
async def _suppress_aioslsk_asyncio_task_noise() -> Any:
"""Suppress non-fatal aioslsk task exceptions emitted via asyncio's loop handler.
aioslsk may spawn background tasks (e.g. direct peer connection attempts) that
can fail with ConnectionFailedError. These are often expected and should not
end a successful download with a scary "Task exception was never retrieved"
traceback.
We only swallow those specific cases and delegate everything else to the
previous/default handler.
"""
try:
loop = asyncio.get_running_loop()
except RuntimeError:
# Not in an event loop.
yield
return
previous_handler = loop.get_exception_handler()
def _handler(loop: asyncio.AbstractEventLoop, context: Dict[str, Any]) -> None:
try:
exc = context.get("exception")
msg = str(context.get("message") or "")
if exc is not None:
# Suppress internal asyncio AssertionError on Windows teardown (Proactor loop)
if isinstance(exc, AssertionError):
m_lower = msg.lower()
if "proactor" in m_lower or "_start_serving" in m_lower or "self._sockets is not None" in str(exc):
return
# Only suppress un-retrieved task exceptions from aioslsk connection failures.
if msg == "Task exception was never retrieved":
cls = getattr(exc, "__class__", None)
name = getattr(cls, "__name__", "")
mod = getattr(cls, "__module__", "")
# Suppress ConnectionFailedError from aioslsk
if name == "ConnectionFailedError" and str(mod).startswith("aioslsk"):
return
except Exception:
# If our filter logic fails, fall through to default handling.
pass
if previous_handler is not None:
previous_handler(loop, context)
else:
loop.default_exception_handler(context)
loop.set_exception_handler(_handler)
try:
yield
finally:
try:
loop.set_exception_handler(previous_handler)
except Exception:
pass
def _configure_aioslsk_logging() -> None:
"""Reduce aioslsk internal log noise.
Some aioslsk components emit non-fatal warnings/errors during high churn
(search + download + disconnect). We keep our own debug output, but push
aioslsk to ERROR and stop propagation so it doesn't spam the CLI.
"""
for name in (
"aioslsk",
"aioslsk.network",
"aioslsk.search",
"aioslsk.transfer",
"aioslsk.transfer.manager",
):
logger = logging.getLogger(name)
logger.setLevel(logging.ERROR)
logger.propagate = False
class _LineFilterStream(io.TextIOBase):
"""A minimal stream wrapper that filters known noisy lines.
It also suppresses entire traceback blocks when they contain known non-fatal
aioslsk noise (e.g. ConnectionReadError during peer init).
"""
def __init__(self, underlying: Any, suppress_substrings: tuple[str, ...]):
super().__init__()
self._underlying = underlying
self._suppress = suppress_substrings
self._buf = ""
self._in_tb = False
self._tb_lines: list[str] = []
self._tb_suppress = False
def writable(self) -> bool: # pragma: no cover
return True
def _should_suppress_line(self, line: str) -> bool:
return any(sub in line for sub in self._suppress)
def _flush_tb(self) -> None:
if not self._tb_lines:
return
if not self._tb_suppress:
for l in self._tb_lines:
try:
self._underlying.write(l + "\n")
except Exception:
pass
self._tb_lines = []
self._tb_suppress = False
self._in_tb = False
def write(self, s: str) -> int:
self._buf += str(s)
while "\n" in self._buf:
line, self._buf = self._buf.split("\n", 1)
self._handle_line(line)
return len(s)
def _handle_line(self, line: str) -> None:
# Start capturing tracebacks so we can suppress the whole block if it matches.
if not self._in_tb and line.startswith("Traceback (most recent call last):"):
self._in_tb = True
self._tb_lines = [line]
self._tb_suppress = False
return
if self._in_tb:
self._tb_lines.append(line)
if self._should_suppress_line(line):
self._tb_suppress = True
# End traceback block on blank line.
if line.strip() == "":
self._flush_tb()
return
# Non-traceback line
if self._should_suppress_line(line):
return
try:
self._underlying.write(line + "\n")
except Exception:
pass
def flush(self) -> None:
# Flush any pending traceback block.
if self._in_tb:
# If the traceback ends without a trailing blank line, decide here.
self._flush_tb()
if self._buf:
line = self._buf
self._buf = ""
if not self._should_suppress_line(line):
try:
self._underlying.write(line)
except Exception:
pass
try:
self._underlying.flush()
except Exception:
pass
@contextlib.contextmanager
def _suppress_aioslsk_noise() -> Any:
"""Temporarily suppress known aioslsk noise printed to stdout/stderr.
Opt out by setting DOWNLOW_SOULSEEK_VERBOSE=1.
"""
if os.environ.get("DOWNLOW_SOULSEEK_VERBOSE"):
yield
return
_configure_aioslsk_logging()
old_out, old_err = sys.stdout, sys.stderr
sys.stdout = _LineFilterStream(old_out, _SOULSEEK_NOISE_SUBSTRINGS)
sys.stderr = _LineFilterStream(old_err, _SOULSEEK_NOISE_SUBSTRINGS)
try:
yield
finally:
try:
sys.stdout.flush()
sys.stderr.flush()
except Exception:
pass
sys.stdout, sys.stderr = old_out, old_err
class Soulseek(Provider):
TABLE_AUTO_STAGES = {
"soulseek": ["download-file", "-plugin", "soulseek"],
}
"""Search provider for Soulseek P2P network."""
def selector(
self,
selected_items: List[Any],
*,
ctx: Any,
stage_is_last: bool = True,
**_kwargs: Any,
) -> bool:
"""Handle Soulseek selection.
Currently defaults to download-file via TABLE_AUTO_STAGES, but this
hook allows for future 'Browse User' or 'Browse Folder' drill-down.
"""
if not stage_is_last:
return False
# If we wanted to handle drill-down (like Tidal.py) we would:
# 1. Fetch more data (e.g. user shares)
# 2. Create a new ResultTable
# 3. ctx.set_current_stage_table(new_table)
# 4. return True
return False
@classmethod
def config_schema(cls) -> List[Dict[str, Any]]:
return [
{
"key": "username",
"label": "Soulseek Username",
"default": "",
"required": True
},
{
"key": "password",
"label": "Soulseek Password",
"default": "",
"required": True,
"secret": True
}
]
MUSIC_EXTENSIONS = {
".flac",
".mp3",
".m4a",
".aac",
".ogg",
".opus",
".wav",
".alac",
".wma",
".ape",
".aiff",
".dsf",
".dff",
".wv",
".tta",
".tak",
".ac3",
".dts",
}
# NOTE: These defaults preserve existing behavior.
USERNAME = "asjhkjljhkjfdsd334"
PASSWORD = "khhhg"
DOWNLOAD_DIR = None
MAX_WAIT_TRANSFER = 1200
def __init__(self, config: Optional[Dict[str, Any]] = None):
super().__init__(config)
try:
from SYS.config import get_soulseek_username, get_soulseek_password
user = get_soulseek_username(self.config)
pwd = get_soulseek_password(self.config)
if user:
Soulseek.USERNAME = user
if pwd:
Soulseek.PASSWORD = pwd
except Exception:
pass
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
"""Download file from Soulseek."""
try:
full_metadata = result.full_metadata or {}
username = full_metadata.get("username")
filename = full_metadata.get("filename") or result.path
if not username or not filename:
# If we were invoked via generic download-file on a SearchResult
# that has minimal data (e.g. from table selection), try to rescue it.
if isinstance(result, SearchResult) and result.full_metadata:
username = result.full_metadata.get("username")
filename = result.full_metadata.get("filename")
if not username or not filename:
log(
f"[soulseek] Missing metadata for download: {result.title}",
file=sys.stderr
)
return None
# Cast to str for Mypy
username = str(username)
filename = str(filename)
# Use tempfile directory as default if generic path elements were passed or None.
if output_dir is None:
import tempfile
target_dir = Path(tempfile.gettempdir()) / "Medios" / "Soulseek"
else:
target_dir = Path(output_dir)
if str(target_dir) in (".", "downloads", "Downloads"):
import tempfile
target_dir = Path(tempfile.gettempdir()) / "Medios" / "Soulseek"
target_dir.mkdir(parents=True, exist_ok=True)
# This cmdlet stack is synchronous; use asyncio.run for clarity.
return asyncio.run(
download_soulseek_file(
username=username,
filename=filename,
output_dir=target_dir,
timeout=self.MAX_WAIT_TRANSFER,
)
)
except RuntimeError:
# If we're already inside an event loop (e.g., TUI), fall back to a
# dedicated loop in this thread.
loop = asyncio.new_event_loop()
try:
# Re-resolve target_dir inside rescue block just in case
if output_dir is None:
import tempfile
target_dir = Path(tempfile.gettempdir()) / "Medios" / "Soulseek"
else:
target_dir = Path(output_dir)
if str(target_dir) in (".", "downloads", "Downloads"):
import tempfile
target_dir = Path(tempfile.gettempdir()) / "Medios" / "Soulseek"
asyncio.set_event_loop(loop)
# Cast to str for Mypy
username_str = str(username)
filename_str = str(filename)
return loop.run_until_complete(
download_soulseek_file(
username=username_str,
filename=filename_str,
output_dir=target_dir,
timeout=self.MAX_WAIT_TRANSFER,
)
)
finally:
try:
loop.close()
except Exception:
pass
except Exception as exc:
log(f"[soulseek] Download error: {exc}", file=sys.stderr)
return None
async def perform_search(self,
query: str,
timeout: float = 9.0,
limit: int = 50) -> List[Dict[str,
Any]]:
"""Perform async Soulseek search."""
from aioslsk.client import SoulSeekClient
from aioslsk.settings import CredentialsSettings, Settings
# Removed legacy os.makedirs(self.DOWNLOAD_DIR) - specific commands handle output dirs.
settings = Settings(
credentials=CredentialsSettings(
username=self.USERNAME,
password=self.PASSWORD
)
)
client = SoulSeekClient(settings)
with _suppress_aioslsk_noise():
async with _suppress_aioslsk_asyncio_task_noise():
try:
await client.start()
await client.login()
except Exception as exc:
log(
f"[soulseek] Login failed: {type(exc).__name__}: {exc}",
file=sys.stderr
)
return []
try:
search_request = await client.searches.search(query)
await self._collect_results(search_request, timeout=timeout)
return self._flatten_results(search_request)[:limit]
except Exception as exc:
log(
f"[soulseek] Search error: {type(exc).__name__}: {exc}",
file=sys.stderr
)
return []
finally:
# Best-effort: try to cancel/close the search request before stopping
# the client to reduce stray reply spam.
try:
if "search_request" in locals() and search_request is not None:
cancel = getattr(search_request, "cancel", None)
if callable(cancel):
maybe = cancel()
if asyncio.iscoroutine(maybe):
await maybe
except Exception:
pass
try:
await client.stop()
except Exception:
pass
# Give Proactor/Windows loop a moment to drain internal buffers after stop.
try:
await asyncio.sleep(0.2)
except Exception:
pass
def _flatten_results(self, search_request: Any) -> List[dict]:
flat: List[dict] = []
for result in getattr(search_request, "results", []):
username = getattr(result, "username", "?")
def _add(file_data: Any) -> None:
flat.append({
"file": file_data,
"username": username,
"filename": getattr(file_data, "filename", "?"),
"size": getattr(file_data, "filesize", 0)
})
for file_data in getattr(result, "shared_items", []):
_add(file_data)
for file_data in getattr(result, "locked_results", []):
_add(file_data)
return flat
async def _collect_results(
self,
search_request: Any,
timeout: float = 75.0
) -> None:
end = time.time() + timeout
last_count = 0
while time.time() < end:
current_count = len(getattr(search_request, "results", []))
if current_count > last_count:
debug(f"[soulseek] Got {current_count} result(s)...")
last_count = current_count
await asyncio.sleep(0.5)
def search(
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str,
Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
filters = filters or {}
# Ensure temp download dir structure exists, but don't create legacy ./downloads here.
import tempfile
base_tmp = Path(tempfile.gettempdir()) / "Medios" / "Soulseek"
base_tmp.mkdir(parents=True, exist_ok=True)
try:
flat_results = asyncio.run(
self.perform_search(query,
timeout=9.0,
limit=limit)
)
if not flat_results:
return []
music_results: List[dict] = []
for item in flat_results:
filename = item["filename"]
ext = (
"." + filename.rsplit(".",
1)[-1].lower()
) if "." in filename else ""
if ext in self.MUSIC_EXTENSIONS:
music_results.append(item)
if not music_results:
return []
enriched_results: List[dict] = []
for item in music_results:
filename = item["filename"]
ext = (
"." + filename.rsplit(".",
1)[-1].lower()
) if "." in filename else ""
display_name = filename.replace("\\", "/").split("/")[-1]
path_parts = filename.replace("\\", "/").split("/")
artist = path_parts[-3] if len(path_parts) >= 3 else ""
album = (
path_parts[-2] if len(path_parts) >= 3 else
(path_parts[-2] if len(path_parts) == 2 else "")
)
base_name = display_name.rsplit(
".",
1
)[0] if "." in display_name else display_name
track_num = ""
title = base_name
filename_artist = ""
match = re.match(r"^(\d{1,3})\s*[\.\-]?\s+(.+)$", base_name)
if match:
track_num = match.group(1)
rest = match.group(2)
if " - " in rest:
filename_artist, title = rest.split(" - ", 1)
else:
title = rest
if filename_artist:
artist = filename_artist
enriched_results.append(
{
**item,
"artist": artist,
"album": album,
"title": title,
"track_num": track_num,
"ext": ext,
}
)
if filters:
artist_filter = (filters.get("artist", "") or "").lower()
album_filter = (filters.get("album", "") or "").lower()
track_filter = (filters.get("track", "") or "").lower()
if artist_filter or album_filter or track_filter:
filtered: List[dict] = []
for item in enriched_results:
if artist_filter and artist_filter not in item["artist"].lower(
):
continue
if album_filter and album_filter not in item["album"].lower():
continue
if track_filter and track_filter not in item["title"].lower():
continue
filtered.append(item)
enriched_results = filtered
enriched_results.sort(
key=lambda item: (item["ext"].lower() != ".flac", -item["size"])
)
results: List[SearchResult] = []
for item in enriched_results:
artist_display = item["artist"] if item["artist"] else "(no artist)"
album_display = item["album"] if item["album"] else "(no album)"
size_mb = int(item["size"] / 1024 / 1024)
columns = [
("Track",
item["track_num"] or "?"),
("Title",
item["title"][:40]),
("Artist",
artist_display[:32]),
("Album",
album_display[:32]),
("Size",
f"{size_mb} MB"),
]
results.append(
SearchResult(
table="soulseek",
title=item["title"],
path=item["filename"],
detail=f"{artist_display} - {album_display}",
annotations=[f"{size_mb} MB",
item["ext"].lstrip(".").upper()],
media_kind="audio",
size_bytes=item["size"],
columns=columns,
selection_action=["download-file", "-plugin", "soulseek"],
full_metadata={
"username": item["username"],
"filename": item["filename"],
"artist": item["artist"],
"album": item["album"],
"track_num": item["track_num"],
"ext": item["ext"],
"provider": "soulseek"
},
)
)
return results
except Exception as exc:
log(f"[soulseek] Search error: {exc}", file=sys.stderr)
return []
def validate(self) -> bool:
try:
from aioslsk.client import SoulSeekClient # noqa: F401
# Require configured credentials.
try:
from SYS.config import get_soulseek_username, get_soulseek_password
user = get_soulseek_username(self.config)
pwd = get_soulseek_password(self.config)
return bool(user and pwd)
except Exception:
# Fall back to legacy class defaults if config helpers aren't available.
return bool(Soulseek.USERNAME and Soulseek.PASSWORD)
except ImportError:
return False
async def download_soulseek_file(
username: str,
filename: str,
output_dir: Optional[Path] = None,
timeout: int = 1200,
*,
client_username: Optional[str] = None,
client_password: Optional[str] = None,
) -> Optional[Path]:
"""Download a file from a Soulseek peer."""
try:
from aioslsk.client import SoulSeekClient
from aioslsk.settings import CredentialsSettings, Settings
from aioslsk.transfer.model import Transfer, TransferDirection
from aioslsk.transfer.state import TransferState
if output_dir is None:
import tempfile
output_dir = Path(tempfile.gettempdir()) / "Medios" / "Soulseek"
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
local_filename = filename.replace("\\", "/").split("/")[-1]
output_user_dir = output_dir / username
output_user_dir.mkdir(parents=True, exist_ok=True)
output_path = output_user_dir / local_filename
if output_path.exists():
base = output_path.stem
ext = output_path.suffix
counter = 1
while output_path.exists():
output_path = output_user_dir / f"{base}_{counter}{ext}"
counter += 1
output_path = output_path.resolve()
login_user = (client_username or Soulseek.USERNAME or "").strip()
login_pass = (client_password or Soulseek.PASSWORD or "").strip()
if not login_user or not login_pass:
raise RuntimeError(
"Soulseek credentials not configured (set provider=soulseek username/password)"
)
settings = Settings(
credentials=CredentialsSettings(username=login_user,
password=login_pass)
)
async def _attempt_once(
attempt_num: int
) -> tuple[Optional[Path],
Any,
int,
float]:
client = SoulSeekClient(settings)
with _suppress_aioslsk_noise():
async with _suppress_aioslsk_asyncio_task_noise():
try:
await client.start()
await client.login()
debug(f"[soulseek] Logged in as {login_user}")
log(
f"[soulseek] Download attempt {attempt_num}: {username} :: {local_filename}",
file=sys.stderr,
)
debug(
f"[soulseek] Requesting download from {username}: {filename}"
)
transfer = await client.transfers.add(
Transfer(username,
filename,
TransferDirection.DOWNLOAD)
)
transfer.local_path = str(output_path)
await client.transfers.queue(transfer)
start_time = time.time()
last_progress_time = start_time
progress_bar = ProgressBar()
while not transfer.is_finalized():
elapsed = time.time() - start_time
if elapsed > timeout:
log(
f"[soulseek] Download timeout after {timeout}s",
file=sys.stderr
)
bytes_done = int(
getattr(transfer,
"bytes_transfered",
0) or 0
)
state_val = getattr(
getattr(transfer,
"state",
None),
"VALUE",
None
)
progress_bar.finish()
return None, state_val, bytes_done, elapsed
bytes_done = int(
getattr(transfer,
"bytes_transfered",
0) or 0
)
total_bytes = int(getattr(transfer, "filesize", 0) or 0)
now = time.time()
if now - last_progress_time >= 0.5:
progress_bar.update(
downloaded=bytes_done,
total=total_bytes if total_bytes > 0 else None,
label="download",
file=sys.stderr,
)
last_progress_time = now
await asyncio.sleep(1)
final_state = getattr(
getattr(transfer,
"state",
None),
"VALUE",
None
)
downloaded_path = (
Path(transfer.local_path)
if getattr(transfer,
"local_path",
None) else output_path
)
final_elapsed = time.time() - start_time
# Clear in-place progress bar.
progress_bar.finish()
# If a file was written, treat it as success even if state is odd.
try:
if downloaded_path.exists() and downloaded_path.stat(
).st_size > 0:
if final_state != TransferState.COMPLETE:
log(
f"[soulseek] Transfer finalized as {final_state}, but file exists ({downloaded_path.stat().st_size} bytes). Keeping file.",
file=sys.stderr,
)
return (
downloaded_path,
final_state,
int(downloaded_path.stat().st_size),
final_elapsed,
)
except Exception:
pass
if final_state == TransferState.COMPLETE and downloaded_path.exists(
):
debug(f"[soulseek] Download complete: {downloaded_path}")
return (
downloaded_path,
final_state,
int(downloaded_path.stat().st_size),
final_elapsed,
)
fail_bytes = int(getattr(transfer, "bytes_transfered", 0) or 0)
fail_total = int(getattr(transfer, "filesize", 0) or 0)
reason = getattr(transfer, "reason", None)
log(
f"[soulseek] Download failed: state={final_state} bytes={fail_bytes}/{fail_total} reason={reason}",
file=sys.stderr,
)
# Clean up 0-byte placeholder.
try:
if downloaded_path.exists() and downloaded_path.stat(
).st_size == 0:
downloaded_path.unlink(missing_ok=True)
except Exception:
pass
return None, final_state, fail_bytes, final_elapsed
finally:
try:
await client.stop()
except Exception:
pass
# Let cancellation/cleanup callbacks run while our exception handler is still installed.
# Increased to 0.2s for Windows Proactor loop stability.
try:
await asyncio.sleep(0.2)
except Exception:
pass
# Retry a couple times only for fast 0-byte failures (common transient case).
max_attempts = 3
for attempt in range(1, max_attempts + 1):
result_path, final_state, bytes_done, elapsed = await _attempt_once(attempt)
if result_path:
return result_path
should_retry = (bytes_done == 0) and (elapsed < 15.0)
if attempt < max_attempts and should_retry:
log(
f"[soulseek] Retrying after fast failure (state={final_state})",
file=sys.stderr
)
await asyncio.sleep(2)
continue
break
return None
except ImportError:
log(
"[soulseek] aioslsk not installed. Install with: pip install aioslsk",
file=sys.stderr
)
return None
except Exception as exc:
log(f"[soulseek] Download failed: {type(exc).__name__}: {exc}", file=sys.stderr)
return None
-1340
View File
File diff suppressed because it is too large Load Diff
-448
View File
@@ -1,448 +0,0 @@
from __future__ import annotations
import logging
import re
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
import requests
from API.requests_client import get_requests_session
from ProviderCore.base import Provider, SearchResult
from SYS.logger import debug, log
try: # Preferred HTML parser
from lxml import html as lxml_html
except Exception: # pragma: no cover - optional
lxml_html = None # type: ignore
logger = logging.getLogger(__name__)
@dataclass
class TorrentInfo:
name: str
url: str
seeders: int
leechers: int
size: str
source: str
category: Optional[str] = None
uploader: Optional[str] = None
magnet: Optional[str] = None
@dataclass
class SearchParams:
name: str
category: Optional[str] = None
order_column: Optional[str] = None
order_ascending: bool = False
_MAGNET_RE = re.compile(r"^magnet", re.IGNORECASE)
class Scraper:
def __init__(self, name: str, base_url: str, timeout: float = 10.0) -> None:
self.name = name
self.base = base_url.rstrip("/")
self.timeout = timeout
self.headers = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0 Safari/537.36"
)
}
self.params: Optional[SearchParams] = None
def find(self, params: SearchParams, pages: int = 1) -> List[TorrentInfo]:
self.params = params
results: List[TorrentInfo] = []
for page in range(1, max(1, pages) + 1):
try:
results.extend(self._get_page(page))
except Exception as exc:
debug(f"[{self.name}] page fetch failed: {exc}")
return results
def _get_page(self, page: int) -> List[TorrentInfo]:
url, payload = self._request_data(page)
try:
resp = get_requests_session().get(
url,
params=payload,
headers=self.headers,
timeout=self.timeout,
)
resp.raise_for_status()
return self._parse_search(resp)
except Exception as exc:
debug(f"[{self.name}] request failed: {exc}")
return []
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
return self.base, {}
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]: # pragma: no cover - interface
raise NotImplementedError
def _parse_detail(self, url: str) -> Optional[str]: # optional override
try:
resp = get_requests_session().get(url, headers=self.headers, timeout=self.timeout)
resp.raise_for_status()
return self._parse_detail_response(resp)
except Exception:
return None
def _parse_detail_response(self, response: requests.Response) -> Optional[str]: # pragma: no cover - interface
return None
@staticmethod
def _int_from_text(value: Any) -> int:
try:
return int(str(value).strip().replace(",", ""))
except Exception:
return 0
class NyaaScraper(Scraper):
def __init__(self) -> None:
super().__init__("nyaa.si", "https://nyaa.si")
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
params = self.params or SearchParams(name="")
payload = {
"p": page,
"q": params.name,
"c": params.category or "0_0",
"f": "0",
}
if params.order_column:
payload["s"] = params.order_column
payload["o"] = "asc" if params.order_ascending else "desc"
return f"{self.base}/", payload
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
if lxml_html is None:
return []
doc = lxml_html.fromstring(response.text)
rows = doc.xpath("//table//tbody/tr")
results: List[TorrentInfo] = []
for row in rows:
cells = row.xpath("./td")
if len(cells) < 7:
continue
category_cell, name_cell, links_cell, size_cell, _, seed_cell, leech_cell, *_ = cells
name_links = name_cell.xpath("./a")
name_tag = name_links[1] if len(name_links) > 1 else (name_links[0] if name_links else None)
if name_tag is None:
continue
name = name_tag.get("title") or (name_tag.text_content() or "").strip()
url = name_tag.get("href") or ""
magnet_link = None
magnet_candidates = links_cell.xpath('.//a[starts-with(@href,"magnet:")]/@href')
if magnet_candidates:
magnet_link = magnet_candidates[0]
category_title = None
cat_titles = category_cell.xpath(".//a/@title")
if cat_titles:
category_title = cat_titles[0]
results.append(
TorrentInfo(
name=name,
url=f"{self.base}{url}",
seeders=self._int_from_text(seed_cell.text_content()),
leechers=self._int_from_text(leech_cell.text_content()),
size=(size_cell.text_content() or "").strip(),
source=self.name,
category=category_title,
magnet=magnet_link,
)
)
return results
class X1337Scraper(Scraper):
def __init__(self) -> None:
super().__init__("1337x.to", "https://1337x.to")
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
params = self.params or SearchParams(name="")
order = None
if params.order_column:
direction = "asc" if params.order_ascending else "desc"
order = f"{params.order_column}/{direction}"
category = params.category
name = requests.utils.quote(params.name)
if order and category:
path = f"/sort-category-search/{name}/{category}/{order}"
elif category:
path = f"/category-search/{name}/{category}"
elif order:
path = f"/sort-search/{name}/{order}"
else:
path = f"/search/{name}"
url = f"{self.base}{path}/{page}/"
return url, {}
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
if lxml_html is None:
return []
doc = lxml_html.fromstring(response.text)
rows = doc.xpath("//table//tbody/tr")
results: List[TorrentInfo] = []
for row in rows:
cells = row.xpath("./td")
if len(cells) < 6:
continue
name_cell, seeds_cell, leech_cell, _, size_cell, uploader_cell = cells
links = name_cell.xpath(".//a")
if len(links) < 2:
continue
torrent_path = links[1].get("href")
torrent_url = f"{self.base}{torrent_path}" if torrent_path else ""
info = TorrentInfo(
name=(links[1].text_content() or "").strip(),
url=torrent_url,
seeders=self._int_from_text(seeds_cell.text_content()),
leechers=self._int_from_text(leech_cell.text_content()),
size=(size_cell.text_content() or "").strip().replace(",", ""),
source=self.name,
uploader=(uploader_cell.text_content() or "").strip() if uploader_cell is not None else None,
)
if not info.magnet:
info.magnet = self._parse_detail(info.url)
results.append(info)
return results
def _parse_detail_response(self, response: requests.Response) -> Optional[str]:
if lxml_html is None:
return None
doc = lxml_html.fromstring(response.text)
links = doc.xpath("//main//a[starts-with(@href,'magnet:')]/@href")
return links[0] if links else None
class YTSScraper(Scraper):
TRACKERS = "&tr=".join(
[
"udp://open.demonii.com:1337/announce",
"udp://tracker.opentrackr.org:1337/announce",
"udp://tracker.leechers-paradise.org:6969",
]
)
def __init__(self) -> None:
super().__init__("yts.mx", "https://yts.mx/api/v2")
self.headers = {}
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
params = self.params or SearchParams(name="")
payload = {
"limit": 50,
"page": page,
"query_term": params.name,
"sort_by": "seeds",
"order_by": "desc" if not params.order_ascending else "asc",
}
return f"{self.base}/list_movies.json", payload
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
results: List[TorrentInfo] = []
data = response.json()
if data.get("status") != "ok":
return results
movies = (data.get("data") or {}).get("movies") or []
for movie in movies:
torrents = movie.get("torrents") or []
if not torrents:
continue
tor = max(torrents, key=lambda t: t.get("seeds", 0))
name = movie.get("title") or "unknown"
info = TorrentInfo(
name=name,
url=str(movie.get("id") or ""),
seeders=int(tor.get("seeds", 0) or 0),
leechers=int(tor.get("peers", 0) or 0),
size=str(tor.get("size") or ""),
source=self.name,
category=(movie.get("genres") or [None])[0],
magnet=self._build_magnet(tor, name),
)
results.append(info)
return results
def _build_magnet(self, torrent: Dict[str, Any], name: str) -> str:
return (
f"magnet:?xt=urn:btih:{torrent.get('hash')}"
f"&dn={requests.utils.quote(name)}&tr={self.TRACKERS}"
)
class ApiBayScraper(Scraper):
"""Scraper for apibay.org (The Pirate Bay API clone)."""
def __init__(self) -> None:
super().__init__("apibay.org", "https://apibay.org")
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
_ = page # single-page API
params = self.params or SearchParams(name="")
return f"{self.base}/q.php", {"q": params.name}
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
results: List[TorrentInfo] = []
try:
data = response.json()
except Exception:
return results
if not isinstance(data, list):
return results
for item in data:
if not isinstance(item, dict):
continue
name = str(item.get("name") or "").strip()
info_hash = str(item.get("info_hash") or "").strip()
if not name or not info_hash:
continue
magnet = self._build_magnet(info_hash, name)
seeders = self._int_from_text(item.get("seeders"))
leechers = self._int_from_text(item.get("leechers"))
size_raw = str(item.get("size") or "").strip()
size_fmt = self._format_size(size_raw)
results.append(
TorrentInfo(
name=name,
url=f"{self.base}/description.php?id={item.get('id')}",
seeders=seeders,
leechers=leechers,
size=size_fmt,
source=self.name,
category=str(item.get("category") or ""),
uploader=str(item.get("username") or ""),
magnet=magnet,
)
)
return results
@staticmethod
def _build_magnet(info_hash: str, name: str) -> str:
return f"magnet:?xt=urn:btih:{info_hash}&dn={requests.utils.quote(name)}"
@staticmethod
def _format_size(size_raw: str) -> str:
try:
size_int = int(size_raw)
if size_int <= 0:
return size_raw
gb = size_int / (1024 ** 3)
if gb >= 1:
return f"{gb:.1f} GB"
mb = size_int / (1024 ** 2)
return f"{mb:.1f} MB"
except Exception:
return size_raw
class Torrent(Provider):
TABLE_AUTO_STAGES = {"torrent": ["download-file"]}
@property
def preserve_order(self) -> bool:
return True
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
super().__init__(config)
self.scrapers: List[Scraper] = []
# JSON APIs (no lxml dependency)
self.scrapers.append(ApiBayScraper())
self.scrapers.append(YTSScraper())
# HTML scrapers require lxml
if lxml_html is not None:
self.scrapers.append(NyaaScraper())
self.scrapers.append(X1337Scraper())
else:
log("[torrent] lxml not installed; skipping Nyaa/1337x scrapers", file=None)
def validate(self) -> bool:
return bool(self.scrapers)
def search(
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
**_kwargs: Any,
) -> List[SearchResult]:
q = str(query or "").strip()
if not q:
return []
params = SearchParams(name=q, order_column="seeders", order_ascending=False)
results: List[TorrentInfo] = []
for scraper in self.scrapers:
try:
scraped = scraper.find(params, pages=1)
results.extend(scraped)
except Exception as exc:
debug(f"[torrent] scraper {scraper.name} failed: {exc}")
continue
results = sorted(results, key=lambda r: r.seeders, reverse=True)
if limit and limit > 0:
results = results[:limit]
out: List[SearchResult] = []
for item in results:
path = item.magnet or item.url
columns = [
("TITLE", item.name),
("Seeds", str(item.seeders)),
("Leechers", str(item.leechers)),
("Size", item.size or ""),
("Source", item.source),
]
if item.uploader:
columns.append(("Uploader", item.uploader))
md = {
"magnet": item.magnet,
"url": item.url,
"source": item.source,
"seeders": item.seeders,
"leechers": item.leechers,
"size": item.size,
}
if item.uploader:
md["uploader"] = item.uploader
out.append(
SearchResult(
table="torrent",
title=item.name,
path=path,
detail=f"Seeds:{item.seeders} | Size:{item.size}",
annotations=[item.source],
media_kind="other",
columns=columns,
full_metadata=md,
tag={"torrent"},
)
)
return out
-870
View File
@@ -1,870 +0,0 @@
"""Minimal Vimm provider: table-row parsing for display.
This minimal implementation focuses on fetching a Vimm search result page,
turning the vault table rows into SearchResults, and letting the CLI
auto-insert the download-file stage directly from the first table so that
Playwright-driven downloads happen without showing a nested detail table.
"""
from __future__ import annotations
from typing import Any, Dict, List, Optional, Tuple
from urllib.parse import parse_qsl, parse_qs, urljoin, urlparse, urlunparse, urlencode
from lxml import html as lxml_html
import base64
import re
from pathlib import Path
from API.HTTP import HTTPClient
from ProviderCore.base import Provider, SearchResult, parse_inline_query_arguments
from ProviderCore.inline_utils import resolve_filter
from SYS.logger import debug
from SYS.provider_helpers import TableProviderMixin
from tool.playwright import PlaywrightTool
class Vimm(TableProviderMixin, Provider):
"""Minimal provider for vimm.net vault listings using TableProvider mixin.
NOTES / HOW-TO (selection & auto-download):
- This provider exposes file rows on a detail page. Each file row includes
a `path` which is an absolute download URL (or a form action + mediaId).
- To make `@N` expansion robust (so users can do `@1 | add-file -store <x>`)
we ensure three things:
1) The ResultTable produced by the `selector()` sets `source_command` to
"download-file" (the canonical cmdlet for downloading files).
2) Each row carries explicit selection args: `['-url', '<full-url>']`.
Using an explicit `-url` flag avoids ambiguity during argument
parsing (some cmdlets accept positional URLs, others accept flags).
3) The CLI's expansion logic places selection args *before* plugin
source args (e.g., `-plugin vimm`) so the first positional token is
the intended URL (not an unknown flag like `-plugin`).
- Why this approach? Argument parsing treats the *first* unrecognized token
as a positional value (commonly interpreted as a URL). If a plugin
injects hints like `-plugin vimm` *before* a bare URL, the parser can
misinterpret `-plugin` as the URL, causing confusing attempts to
download `-plugin`. By using `-url` and ensuring the URL appears first
we avoid that class of bugs and make `@N` -> `download-file`/`add-file`
flows reliable.
The code below implements these choices (and contains inline comments
explaining specific decisions)."""
URL = ("https://vimm.net/vault/",)
URL_DOMAINS = ("vimm.net",)
def get_source_command(self, args_list: List[str]) -> Tuple[str, List[str]]:
return "search-file", ["-plugin", self.name]
REGION_CHOICES = [
{"value": "1", "text": "Argentina"},
{"value": "2", "text": "Asia"},
{"value": "3", "text": "Australia"},
{"value": "35", "text": "Austria"},
{"value": "31", "text": "Belgium"},
{"value": "4", "text": "Brazil"},
{"value": "5", "text": "Canada"},
{"value": "6", "text": "China"},
{"value": "38", "text": "Croatia"},
{"value": "7", "text": "Denmark"},
{"value": "8", "text": "Europe"},
{"value": "9", "text": "Finland"},
{"value": "10", "text": "France"},
{"value": "11", "text": "Germany"},
{"value": "12", "text": "Greece"},
{"value": "13", "text": "Hong Kong"},
{"value": "27", "text": "India"},
{"value": "33", "text": "Ireland"},
{"value": "34", "text": "Israel"},
{"value": "14", "text": "Italy"},
{"value": "15", "text": "Japan"},
{"value": "16", "text": "Korea"},
{"value": "30", "text": "Latin America"},
{"value": "17", "text": "Mexico"},
{"value": "18", "text": "Netherlands"},
{"value": "40", "text": "New Zealand"},
{"value": "19", "text": "Norway"},
{"value": "28", "text": "Poland"},
{"value": "29", "text": "Portugal"},
{"value": "20", "text": "Russia"},
{"value": "32", "text": "Scandinavia"},
{"value": "37", "text": "South Africa"},
{"value": "21", "text": "Spain"},
{"value": "22", "text": "Sweden"},
{"value": "36", "text": "Switzerland"},
{"value": "23", "text": "Taiwan"},
{"value": "39", "text": "Turkey"},
{"value": "41", "text": "United Arab Emirates"},
{"value": "24", "text": "United Kingdom"},
{"value": "25", "text": "USA"},
{"value": "26", "text": "World"},
]
QUERY_ARG_CHOICES = {
"system": [
"Atari2600",
"Atari5200",
"Atari7800",
"CDi",
"Dreamcast",
"GB",
"GBA",
"GBC",
"GG",
"GameCube",
"Genesis",
"Jaguar",
"JaguarCD",
"Lynx",
"SMS",
"NES",
"3DS",
"N64",
"DS",
"PS1",
"PS2",
"PS3",
"PSP",
"Saturn",
"32X",
"SegaCD",
"SNES",
"TG16",
"TGCD",
"VB",
"Wii",
"WiiWare",
"Xbox",
"Xbox360",
"X360-D",
],
"region": REGION_CHOICES,
}
# ProviderCore still looks for INLINE_QUERY_FIELD_CHOICES, so expose this
# mapping once and keep QUERY_ARG_CHOICES as the readable name we prefer.
INLINE_QUERY_FIELD_CHOICES = QUERY_ARG_CHOICES
# Table metadata/constants grouped near the table helpers below.
TABLE_AUTO_STAGES = {"vimm": ["download-file"]}
AUTO_STAGE_USE_SELECTION_ARGS = True
TABLE_SYSTEM_COLUMN = {"label": "Platform", "metadata_key": "system"}
def validate(self) -> bool:
return True
def search(self, query: str, limit: int = 50, filters: Optional[Dict[str, Any]] = None, **kwargs: Any) -> List[SearchResult]:
q = (query or "").strip()
if not q:
return []
base = "https://vimm.net/vault/"
normalized_filters: Dict[str, Any] = {}
for key, value in (filters or {}).items():
if key is None:
continue
normalized_filters[str(key).lower()] = value
system_value = normalized_filters.get("system") or normalized_filters.get("platform")
system_param = str(system_value or "").strip()
region_value = normalized_filters.get("region")
region_param = str(region_value or "").strip()
params = [("p", "list"), ("q", q)]
if system_param:
params.append(("system", system_param))
if region_param:
params.append(("region", region_param))
url = f"{base}?{urlencode(params)}"
debug(f"[vimm] search: query={q} url={url} filters={normalized_filters}")
try:
with HTTPClient(timeout=9.0) as client:
resp = client.get(url)
content = resp.content
except Exception as exc:
debug(f"[vimm] HTTP fetch failed: {exc}")
return []
try:
doc = lxml_html.fromstring(content)
except Exception as exc:
debug(f"[vimm] HTML parse failed: {exc}")
return []
xpaths = [
"//table//tbody/tr",
"//table//tr[td]",
"//div[contains(@class,'list-item')]",
"//div[contains(@class,'result')]",
"//li[contains(@class,'item')]",
]
rows = doc.xpath("//table//tr[td]")
results = self._build_results_from_rows(rows, url, system_param, limit)
if not results:
results = self.search_table_from_url(url, limit=limit, xpaths=xpaths)
self._ensure_system_column(results, system_param)
results = [self._apply_selection_defaults(r, referer=url, detail_url=getattr(r, "path", "")) for r in (results or [])]
debug(f"[vimm] results={len(results)}")
return results[: int(limit)]
def extract_query_arguments(self, query: str) -> Tuple[str, Dict[str, Any]]:
normalized, inline_args = parse_inline_query_arguments(query)
inline_args_norm: Dict[str, Any] = {}
for k, v in (inline_args or {}).items():
if k is None:
continue
key_norm = str(k).strip().lower()
if key_norm == "platform":
key_norm = "system"
inline_args_norm[key_norm] = v
filters = resolve_filter(self, inline_args_norm)
return normalized, filters
def _build_results_from_rows(
self,
rows: List[Any],
base_url: str,
system_value: Optional[str],
limit: int,
) -> List[SearchResult]:
out: List[SearchResult] = []
seen: set[str] = set()
system_column = getattr(self, "TABLE_SYSTEM_COLUMN", {}) or {}
key = str(system_column.get("metadata_key") or "system").strip()
if not key:
key = "system"
for tr in rows:
if len(out) >= limit:
break
rec = self._parse_table_row(tr, base_url, system_value)
if not rec:
continue
path = rec.get("path")
if not path or path in seen:
continue
seen.add(path)
columns = self._build_columns_from_record(rec)
if not columns:
continue
metadata: Dict[str, Any] = {"raw_record": rec, "detail_url": path, "referer": base_url}
if path:
metadata["_selection_args"] = ["-url", path]
platform_value = rec.get("platform")
if platform_value:
metadata[key] = platform_value
sr = SearchResult(
table="vimm",
title=rec.get("title") or "",
path=path,
detail="",
annotations=[],
media_kind="file",
size_bytes=None,
tag={"vimm"},
columns=columns,
full_metadata=metadata,
)
out.append(self._apply_selection_defaults(sr, referer=base_url, detail_url=path))
return out
def _parse_table_row(self, tr: Any, base_url: str, system_value: Optional[str]) -> Dict[str, str]:
tds = tr.xpath("./td")
if not tds:
return {}
rec: Dict[str, str] = {}
title_anchor = tds[0].xpath('.//a[contains(@href,"/vault/")]') or []
if title_anchor:
el = title_anchor[0]
rec["title"] = (el.text_content() or "").strip()
href = el.get("href") or ""
rec["path"] = urljoin(base_url, href) if href else ""
if system_value:
rec["platform"] = str(system_value).strip().upper()
rec["region"] = self._flag_text_at(tds, 1)
rec["version"] = self._text_at(tds, 2)
rec["languages"] = self._text_at(tds, 3)
else:
raw_platform = (tds[0].text_content() or "").strip()
if raw_platform:
rec["platform"] = raw_platform.upper()
anchors = tds[1].xpath('.//a[contains(@href,"/vault/")]') or tds[1].xpath('.//a')
if not anchors:
return {}
el = anchors[0]
rec["title"] = (el.text_content() or "").strip()
href = el.get("href") or ""
rec["path"] = urljoin(base_url, href) if href else ""
rec["region"] = self._flag_text_at(tds, 2)
rec["version"] = self._text_at(tds, 3)
rec["languages"] = self._text_at(tds, 4)
return {k: v for k, v in rec.items() if v}
def _text_at(self, tds: List[Any], idx: int) -> str:
if idx < 0 or idx >= len(tds):
return ""
return (tds[idx].text_content() or "").strip()
def _flag_text_at(self, tds: List[Any], idx: int) -> str:
if idx < 0 or idx >= len(tds):
return ""
td = tds[idx]
imgs = td.xpath('.//img[contains(@class,"flag")]/@title')
if imgs:
return str(imgs[0]).strip()
return (td.text_content() or "").strip()
def _build_columns_from_record(self, rec: Dict[str, str]) -> List[Tuple[str, str]]:
title = rec.get("title") or ""
if not title:
return []
columns: List[Tuple[str, str]] = [("Title", title)]
system_column = getattr(self, "TABLE_SYSTEM_COLUMN", {}) or {}
label = str(system_column.get("label") or "Platform")
platform_value = rec.get("platform")
if platform_value:
columns.append((label, platform_value))
for key, friendly in (("region", "Region"), ("version", "Version"), ("languages", "Languages")):
value = rec.get(key)
if value:
columns.append((friendly, value))
return columns
def _apply_selection_defaults(self, sr: SearchResult, *, referer: Optional[str], detail_url: Optional[str]) -> SearchResult:
"""Attach selection metadata so @N expansion passes a usable URL first."""
try:
md = dict(getattr(sr, "full_metadata", {}) or {})
except Exception:
md = {}
path_val = str(getattr(sr, "path", "") or "")
if not path_val:
path_val = str(detail_url or "")
if path_val:
md.setdefault("_selection_args", ["-url", path_val])
md.setdefault("detail_url", detail_url or path_val)
if referer:
md.setdefault("referer", referer)
sr.full_metadata = md
return sr
def _ensure_system_column(self, results: List[SearchResult], system_value: Optional[str]) -> None:
if not results or not system_value:
return
label_value = str(system_value).strip()
if not label_value:
return
label_value = label_value.upper()
system_column = getattr(self, "TABLE_SYSTEM_COLUMN", {}) or {}
label_name = str(system_column.get("label") or "Platform").strip()
if not label_name:
label_name = "Platform"
normalized_label = label_name.strip().lower()
metadata_key = str(system_column.get("metadata_key") or "system").strip()
if not metadata_key:
metadata_key = "system"
for result in results:
try:
cols = getattr(result, "columns", None)
if isinstance(cols, list):
lowered = {str(name or "").strip().lower() for name, _ in cols}
if normalized_label not in lowered:
insert_pos = 1 if cols else 0
cols.insert(insert_pos, (label_name, label_value))
metadata = getattr(result, "full_metadata", None)
if isinstance(metadata, dict):
metadata.setdefault(metadata_key, label_value)
except Exception:
continue
def _parse_detail_doc(self, doc, base_url: str) -> List[Any]:
"""Parse a Vimm detail page (non-standard table layout) and return a list
of SearchResult or dict payloads suitable for `ResultTable.add_result()`.
The function extracts simple key/value rows and file download entries (anchors
or download forms) and returns property dicts first followed by file SearchResults.
"""
def _build_download_url(action_url: str, params: Dict[str, str]) -> str:
if not action_url:
return ""
if not params:
return action_url
cleaned = {k: str(v) for k, v in params.items() if v is not None and str(v) != ""}
if not cleaned:
return action_url
parsed = urlparse(action_url)
existing = dict(parse_qsl(parsed.query, keep_blank_values=True))
existing.update(cleaned)
query = urlencode(existing, doseq=True)
return urlunparse(parsed._replace(query=query))
try:
# Prefer the compact 'rounded' detail table when present
tables = doc.xpath('//table[contains(@class,"rounded") and contains(@class,"cellpadding1")]') or doc.xpath('//table[contains(@class,"rounded")]')
if not tables:
return []
tbl = tables[0]
trs = tbl.xpath('.//tr') or []
# Aggregate page properties into a mapping and create file rows with Title, Region, CRC, Version
props: Dict[str, Any] = {}
anchors_by_label: Dict[str, List[Dict[str, str]]] = {}
for tr in trs:
try:
if tr.xpath('.//hr'):
continue
tds = tr.xpath('./td')
if not tds:
continue
# Canvas-based title row (base64 encoded in data-v)
canvas = tr.xpath('.//canvas[@data-v]')
if canvas:
data_v = canvas[0].get('data-v') or ''
try:
raw = base64.b64decode(data_v)
txt = raw.decode('utf-8', errors='ignore').strip()
except Exception:
txt = (canvas[0].text_content() or '').strip()
if txt:
props['Title'] = txt
continue
label = (tds[0].text_content() or '').strip()
if not label:
continue
val_td = tds[-1]
# collect anchors under this label for later detection
anchors = val_td.xpath('.//a')
if anchors:
entries = []
for a in anchors:
entries.append({'text': (a.text_content() or '').strip(), 'href': a.get('href') or ''})
# try to capture any explicit span value (e.g., CRC) even if an anchor exists
span_data = val_td.xpath('.//span[@id]/text()')
if span_data:
props[label] = str(span_data[0]).strip()
else:
# fallback to direct text nodes excluding anchor text
txts = [t.strip() for t in val_td.xpath('.//text()') if t.strip()]
anchor_texts = [a.text_content().strip() for a in anchors if a.text_content()]
filtered = [t for t in txts if t not in anchor_texts]
if filtered:
props[label] = filtered[0]
anchors_by_label[label] = entries
continue
img_title = val_td.xpath('.//img/@title')
if img_title:
val = str(img_title[0]).strip()
else:
span_data = val_td.xpath('.//span[@id]/text()')
if span_data:
val = str(span_data[0]).strip()
else:
opt = val_td.xpath('.//select/option[@selected]/text()')
if opt:
val = str(opt[0]).strip()
else:
vt = val_td.xpath('.//div[@id="version_text"]/text()')
if vt:
val = vt[0].strip()
else:
val = (val_td.text_content() or '').strip()
props[label] = val
except Exception:
continue
# Download form handling: find action, mediaId, and dl_size
form = doc.xpath('//form[@id="dl_form"]')
action = ''
media_id = None
dl_size = None
form_inputs: Dict[str, str] = {}
download_url = ''
if form:
f = form[0]
action = f.get('action') or ''
if action.startswith('//'):
action = 'https:' + action
elif action.startswith('/'):
action = urljoin(base_url, action)
media_ids = f.xpath('.//input[@name="mediaId"]/@value')
media_id = media_ids[0] if media_ids else None
size_vals = doc.xpath('//td[@id="dl_size"]/text()')
dl_size = size_vals[0].strip() if size_vals else None
inputs = f.xpath('.//input[@name]')
for inp in inputs:
name = (inp.get('name') or '').strip()
if not name:
continue
form_inputs[name] = inp.get('value') or ''
download_url = _build_download_url(action, form_inputs)
file_results: List[SearchResult] = []
# Create file rows from anchors that look like downloads
for lbl, alist in anchors_by_label.items():
for a in alist:
href = a.get('href') or ''
txt = a.get('text') or ''
is_download_link = False
if href:
low = href.lower()
if 'p=download' in low or '/download' in low or '/dl' in low:
is_download_link = True
for ext in ('.zip', '.nes', '.gba', '.bin', '.7z', '.iso'):
if low.endswith(ext):
is_download_link = True
break
if txt and re.search(r"\.[a-z0-9]{1,5}$", txt, re.I):
is_download_link = True
if not is_download_link:
continue
title = txt or props.get('Title') or ''
path = urljoin(base_url, href) if href else ''
cols = [("Title", title), ("Region", props.get('Region', '')), ("CRC", props.get('CRC', '')), ("Version", props.get('Version', ''))]
if dl_size:
cols.append(("Size", dl_size))
metadata: Dict[str, Any] = {"raw_record": {"label": lbl}}
if base_url:
metadata["referer"] = base_url
metadata.setdefault("detail_url", base_url)
sr = SearchResult(table="vimm", title=title, path=path, detail="", annotations=[], media_kind="file", size_bytes=None, tag={"vimm"}, columns=cols, full_metadata=metadata)
file_results.append(self._apply_selection_defaults(sr, referer=base_url, detail_url=base_url))
# If no explicit file anchors, but we have a form, create a single file entry using page properties
if not file_results and (media_id or action):
# Ensure CRC is captured even if earlier parsing missed it
if not props.get('CRC'):
try:
crc_vals = doc.xpath('//span[@id="data-crc"]/text()')
if crc_vals:
props['CRC'] = str(crc_vals[0]).strip()
except Exception:
pass
title = props.get('Title') or ''
cols = [("Title", title), ("Region", props.get('Region', '')), ("CRC", props.get('CRC', '')), ("Version", props.get('Version', ''))]
if dl_size:
cols.append(("Size", dl_size))
target_path = download_url or action or base_url
sr = SearchResult(
table="vimm",
title=title,
path=target_path,
detail="",
annotations=[],
media_kind="file",
size_bytes=None,
tag={"vimm"},
columns=cols,
full_metadata={
"mediaId": media_id,
"dl_action": action,
"download_url": download_url,
"form_params": dict(form_inputs),
"referer": base_url,
"raw_props": props,
},
)
file_results.append(self._apply_selection_defaults(sr, referer=base_url, detail_url=base_url))
# Attach mediaId/dl_action to file rows
if file_results and (media_id or action):
for fi in file_results:
try:
fi.full_metadata = dict(getattr(fi, 'full_metadata', {}) or {})
if media_id:
fi.full_metadata['mediaId'] = media_id
if action:
fi.full_metadata['dl_action'] = action
if form_inputs:
fi.full_metadata.setdefault('form_params', dict(form_inputs))
if download_url:
fi.full_metadata['download_url'] = download_url
if dl_size and not any((k.lower() == 'size') for k, _ in getattr(fi, 'columns', [])):
fi.columns.append(("Size", dl_size))
except Exception:
continue
# Return only file rows (properties are attached as columns)
return file_results
except Exception:
return []
def _fetch_detail_rows(self, detail_url: str) -> List[SearchResult]:
"""Fetch the detail page for a selected row and return the parsed file rows."""
detail_url = str(detail_url or "").strip()
if not detail_url:
return []
try:
with HTTPClient(timeout=9.0) as client:
resp = client.get(detail_url)
doc = lxml_html.fromstring(resp.content)
except Exception as exc:
debug(f"[vimm] detail fetch failed: {exc}")
return []
return self._parse_detail_doc(doc, base_url=detail_url)
def _download_from_payload(self, payload: Dict[str, Any], output_dir: Path) -> Optional[Path]:
"""Download using the metadata/form data stored in a SearchResult payload."""
try:
d = payload or {}
fm = d.get("full_metadata") or {}
media_id = fm.get("mediaId") or fm.get("media_id")
base_action = fm.get("dl_action") or d.get("path") or ""
download_url = fm.get("download_url")
params = dict(fm.get("form_params") or {})
if media_id:
params.setdefault("mediaId", media_id)
target = download_url or base_action
if not target:
return None
if target.startswith("//"):
target = "https:" + target
# Avoid downloading HTML detail pages directly; let detail parsing handle them.
low_target = target.lower()
if ("vimm.net/vault" in low_target or "?p=list" in low_target) and not download_url and not media_id and not params:
return None
referer = fm.get("referer") or d.get("referer") or d.get("detail_url")
headers: Dict[str, str] = {}
if not referer:
try:
from SYS.pipeline import get_last_result_items
items = get_last_result_items() or []
try:
parsed_target = urlparse(target)
target_qs = parse_qs(parsed_target.query)
target_media = None
if isinstance(target_qs, dict):
target_media = (target_qs.get("mediaId") or target_qs.get("mediaid") or [None])[0]
if target_media is not None:
target_media = str(target_media)
except Exception:
target_media = None
found = None
for it in items:
try:
it_d = it if isinstance(it, dict) else (it.to_dict() if hasattr(it, "to_dict") else {})
fm2 = (it_d.get("full_metadata") or {}) if isinstance(it_d, dict) else {}
dl_cand = (fm2.get("download_url") or fm2.get("dl_action") or it_d.get("path"))
if target_media:
m2 = None
if isinstance(fm2, dict):
m2 = str(fm2.get("mediaId") or fm2.get("media_id") or "")
if m2 and m2 == target_media:
found = it_d
break
if dl_cand and str(dl_cand).strip() and (str(dl_cand).strip() == str(target).strip() or str(dl_cand) in str(target) or str(target) in str(dl_cand)):
found = it_d
break
except Exception:
continue
if found:
referer = (found.get("full_metadata") or {}).get("referer") or found.get("detail_url") or found.get("path")
except Exception:
referer = referer
if referer:
headers["Referer"] = str(referer)
headers_arg = headers or None
out_dir = Path(output_dir or Path("."))
out_dir.mkdir(parents=True, exist_ok=True)
filename_hint = str(d.get("title") or f"vimm_{media_id or 'download'}")
with HTTPClient(timeout=60.0) as client:
try:
if download_url:
resp = client.get(target, headers=headers_arg)
elif params:
resp = client.get(target, params=params, headers=headers_arg)
else:
resp = client.get(target, headers=headers_arg)
except Exception as exc_get:
try:
detail_url = referer or target
p = self._playwright_fetch(detail_url, out_dir, selector="form#dl_form button[type=submit]", timeout_sec=60)
if p:
debug(f"[vimm] downloaded via Playwright after get() error: {p}")
return p
except Exception as e:
debug(f"[vimm] Playwright download failed after get() error: {e}")
debug(f"[vimm] HTTP GET failed (network): {exc_get}")
return None
try:
resp.raise_for_status()
except Exception as exc:
try:
detail_url = referer or target
p = self._playwright_fetch(detail_url, out_dir, selector="form#dl_form button[type=submit]", timeout_sec=60)
if p:
debug(f"[vimm] downloaded via Playwright after HTTP error: {p}")
return p
except Exception as e:
debug(f"[vimm] Playwright download failed after HTTP error: {e}")
debug(f"[vimm] HTTP GET failed: {exc}")
return None
content = getattr(resp, "content", b"") or b""
cd = getattr(resp, "headers", {}).get("content-disposition", "") if hasattr(resp, "headers") else ""
m = re.search(r'filename\*?=(?:"([^"]*)"|([^;\s]*))', cd)
if m:
fname = m.group(1) or m.group(2)
else:
fname = filename_hint
out_path = out_dir / str(fname)
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_bytes(content)
return out_path
except Exception as exc:
debug(f"[vimm] download failed: {exc}")
return None
def _playwright_fetch(self, detail_url: str, out_dir: Path, selector: str = "form#dl_form button[type=submit]", timeout_sec: int = 90) -> Optional[Path]:
"""Attempt a browser-driven download using the shared Playwright tool.
Playwright is a required runtime dependency for this operation; import
failures will surface at module import time rather than being silently
swallowed by per-call guards.
"""
# Prefer headful-first attempts for Vimm to mirror real browser behaviour
cfg = {}
try:
from SYS.config import load_config
cfg = load_config() or {}
except Exception:
cfg = {}
tool = PlaywrightTool(cfg)
result = tool.download_file(
detail_url,
selector=selector,
out_dir=out_dir,
timeout_sec=timeout_sec,
headless_first=False,
debug_mode=False,
)
if result.ok and result.path:
return result.path
debug(f"[vimm] playwright helper failed: {result.error}")
return None
def download(self, result: Any, output_dir: Path, progress_callback: Optional[Any] = None) -> Optional[Path]:
"""Download an item identified on a Vimm detail page."""
payload = result.to_dict() if hasattr(result, "to_dict") else (result if isinstance(result, dict) else {})
downloaded = self._download_from_payload(payload, output_dir)
if downloaded:
return downloaded
detail_url = str(payload.get("path") or "").strip()
if not detail_url:
return None
for row in self._fetch_detail_rows(detail_url):
detail_payload = row.to_dict() if hasattr(row, "to_dict") else (row if isinstance(row, dict) else {})
downloaded = self._download_from_payload(detail_payload, output_dir)
if downloaded:
return downloaded
return None
# Minimal provider registration
# Minimal provider registration
try:
from SYS.result_table_adapters import register_plugin
from SYS.result_table_api import ResultModel, title_column, metadata_column
def _convert_search_result_to_model(sr):
d = sr.to_dict() if hasattr(sr, "to_dict") else (sr if isinstance(sr, dict) else {"title": getattr(sr, "title", str(sr))})
title = d.get("title") or ""
path = d.get("path") or None
columns = d.get("columns") or getattr(sr, "columns", None) or []
metadata: Dict[str, Any] = {}
for name, value in columns:
key = str(name or "").strip().lower()
if key in ("system", "region", "version", "languages", "size"):
metadata[key] = value
try:
fm = d.get("full_metadata") or {}
if isinstance(fm, dict):
for k, v in fm.items():
metadata[str(k).strip().lower()] = v
except Exception:
pass
return ResultModel(title=str(title), path=str(path) if path else None, ext=None, size_bytes=None, metadata=metadata, source="vimm")
def _adapter(items):
for it in items:
yield _convert_search_result_to_model(it)
def _columns_factory(rows):
cols = [title_column()]
md = lambda key: any((r.metadata or {}).get(key) for r in rows)
if md("system"):
cols.append(metadata_column("system", "system"))
if md("region"):
cols.append(metadata_column("region", "Region"))
if md("version"):
cols.append(metadata_column("version", "Version"))
if md("languages"):
cols.append(metadata_column("languages", "Languages"))
if md("size"):
cols.append(metadata_column("size", "Size"))
return cols
def _selection_fn(row):
# Return explicit URL selection args so `select -run-cmd` and `@N` expansion
# behave correctly when the downstream stage is a downloader (e.g., download-file).
# Using '-url' is explicit and avoids ambiguity during argument parsing.
if getattr(row, "path", None):
return ["-url", row.path]
return ["-title", row.title or ""]
register_plugin(
"vimm",
_adapter,
columns=_columns_factory,
selection_fn=_selection_fn,
metadata={"description": "Minimal Vimm provider"},
)
except Exception:
# best-effort registration
pass
-218
View File
@@ -1,218 +0,0 @@
from __future__ import annotations
import sys
from typing import Any, Dict, Iterable, List, Optional
from ProviderCore.base import Provider, SearchResult
from SYS.provider_helpers import TableProviderMixin
from SYS.logger import log
class YouTube(TableProviderMixin, Provider):
"""YouTube video search provider using yt_dlp.
This provider uses the new table system (strict ResultTable adapter pattern) for
consistent selection and auto-stage integration. It exposes videos as SearchResult
rows with metadata enrichment for:
- video_id: Unique YouTube video identifier
- uploader: Channel/creator name
- duration: Video length in seconds
- view_count: Number of views
- _selection_args: For @N expansion control and download-file routing
SELECTION FLOW:
1. User runs: search-file -plugin youtube "linux tutorial"
2. Results show video rows with uploader, duration, views
3. User selects a video: @1
4. Selection metadata routes to download-file with the YouTube URL
5. download-file uses yt_dlp to download the video
"""
TABLE_AUTO_STAGES = {
"youtube": ["download-file"],
}
# If the user provides extra args on the selection stage, forward them to download-file.
AUTO_STAGE_USE_SELECTION_ARGS = True
@property
def preserve_order(self) -> bool:
return True
def search(
self,
query: str,
limit: int = 10,
filters: Optional[Dict[str,
Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
# Use the yt_dlp Python module (installed via requirements.txt).
try:
import yt_dlp # type: ignore
ydl_opts: Dict[str,
Any] = {
"quiet": True,
"skip_download": True,
"extract_flat": True
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
search_query = f"ytsearch{limit}:{query}"
info = ydl.extract_info(search_query, download=False)
entries = info.get("entries") or []
results: List[SearchResult] = []
for video_data in entries[:limit]:
title = video_data.get("title", "Unknown")
video_id = video_data.get("id", "")
url = video_data.get(
"url"
) or f"https://youtube.com/watch?v={video_id}"
uploader = video_data.get("uploader", "Unknown")
duration = video_data.get("duration", 0)
view_count = video_data.get("view_count", 0)
duration_str = (
f"{int(duration // 60)}:{int(duration % 60):02d}"
if duration else ""
)
views_str = f"{view_count:,}" if view_count else ""
results.append(
SearchResult(
table="youtube",
title=title,
path=url,
detail=f"By: {uploader}",
annotations=[duration_str,
f"{views_str} views"],
media_kind="video",
columns=[
("Title",
title),
("Uploader",
uploader),
("Duration",
duration_str),
("Views",
views_str),
],
full_metadata={
"video_id": video_id,
"uploader": uploader,
"duration": duration,
"view_count": view_count,
# Selection metadata for table system and @N expansion
"_selection_args": ["-url", url],
},
)
)
return results
except Exception:
log("[youtube] yt_dlp import failed", file=sys.stderr)
return []
def validate(self) -> bool:
try:
return True
except Exception:
return False
# Minimal provider registration for the new table system
try:
from SYS.result_table_adapters import register_plugin
from SYS.result_table_api import ResultModel, ColumnSpec, metadata_column, title_column
def _convert_search_result_to_model(sr: Any) -> ResultModel:
"""Convert YouTube SearchResult to ResultModel for strict table display."""
d = sr.to_dict() if hasattr(sr, "to_dict") else (sr if isinstance(sr, dict) else {"title": getattr(sr, "title", str(sr))})
title = d.get("title") or ""
path = d.get("path") or None
columns = d.get("columns") or getattr(sr, "columns", None) or []
# Extract metadata from columns and full_metadata
metadata: Dict[str, Any] = {}
for name, value in columns:
key = str(name or "").strip().lower()
if key in ("uploader", "duration", "views", "video_id"):
metadata[key] = value
try:
fm = d.get("full_metadata") or {}
if isinstance(fm, dict):
for k, v in fm.items():
metadata[str(k).strip().lower()] = v
except Exception:
pass
return ResultModel(
title=str(title),
path=str(path) if path else None,
ext=None,
size_bytes=None,
metadata=metadata,
source="youtube"
)
def _adapter(items: Iterable[Any]) -> Iterable[ResultModel]:
"""Adapter to convert SearchResults to ResultModels."""
for it in items:
try:
yield _convert_search_result_to_model(it)
except Exception:
continue
def _has_metadata(rows: List[ResultModel], key: str) -> bool:
"""Check if any row has a given metadata key with a non-empty value."""
for row in rows:
md = row.metadata or {}
if key in md:
val = md[key]
if val is None:
continue
if isinstance(val, str) and not val.strip():
continue
return True
return False
def _columns_factory(rows: List[ResultModel]) -> List[ColumnSpec]:
"""Build column specifications from available metadata in rows."""
cols = [title_column()]
if _has_metadata(rows, "uploader"):
cols.append(metadata_column("uploader", "Uploader"))
if _has_metadata(rows, "duration"):
cols.append(metadata_column("duration", "Duration"))
if _has_metadata(rows, "views"):
cols.append(metadata_column("views", "Views"))
return cols
def _selection_fn(row: ResultModel) -> List[str]:
"""Return selection args for @N expansion and auto-download integration.
Uses explicit -url flag to ensure the YouTube URL is properly routed
to download-file for yt_dlp download processing.
"""
metadata = row.metadata or {}
# Check for explicit selection args first
args = metadata.get("_selection_args") or metadata.get("selection_args")
if isinstance(args, (list, tuple)) and args:
return [str(x) for x in args if x is not None]
# Fallback to direct URL
if row.path:
return ["-url", row.path]
return ["-title", row.title or ""]
register_plugin(
"youtube",
_adapter,
columns=_columns_factory,
selection_fn=_selection_fn,
metadata={"description": "YouTube video search using yt_dlp"},
)
except Exception:
# best-effort registration
pass
-1432
View File
File diff suppressed because it is too large Load Diff
-72
View File
@@ -1,72 +0,0 @@
from __future__ import annotations
import os
import sys
from typing import Any
from ProviderCore.base import Provider
from SYS.logger import log
class ZeroXZero(Provider):
"""File provider for 0x0.st."""
PLUGIN_NAME = "0x0"
PLUGIN_ALIASES = ("zeroxzero",)
def upload(self, file_path: str, **kwargs: Any) -> str:
from API.HTTP import HTTPClient
from SYS.models import ProgressFileReader
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
try:
headers = {
"User-Agent": "Medeia-Macina/1.0"
}
with HTTPClient(headers=headers) as client:
with open(file_path, "rb") as handle:
try:
total = os.path.getsize(file_path)
except Exception:
total = None
wrapped = ProgressFileReader(
handle,
total_bytes=total,
label="upload"
)
response = client.post(
"https://0x0.st",
files={
"file": wrapped
}
)
if response.status_code == 200:
uploaded_url = response.text.strip()
try:
pipe_obj = kwargs.get("pipe_obj")
if pipe_obj is not None:
from Store import Store
Store(
self.config,
suppress_debug=True
).try_add_url_for_pipe_object(pipe_obj,
uploaded_url)
except Exception:
pass
return uploaded_url
raise Exception(
f"Upload failed: {response.status_code} - {response.text}"
)
except Exception as exc:
log(f"[0x0] Upload error: {exc}", file=sys.stderr)
raise
def validate(self) -> bool:
return True