df
This commit is contained in:
369
API/HTTP.py
369
API/HTTP.py
@@ -10,10 +10,24 @@ Provides synchronous and asynchronous HTTP operations with:
|
||||
|
||||
import httpx
|
||||
import asyncio
|
||||
from typing import Optional, Dict, Any, Callable, BinaryIO
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
import re
|
||||
from typing import Optional, Dict, Any, Callable, BinaryIO, List, Iterable, Set
|
||||
from pathlib import Path
|
||||
from urllib.parse import unquote, urlparse, parse_qs
|
||||
import logging
|
||||
|
||||
from SYS.logger import debug, log
|
||||
from SYS.models import DebugLogger, DownloadError, DownloadMediaResult, ProgressBar
|
||||
from SYS.utils import ensure_directory, sha256_file
|
||||
|
||||
try: # Optional; used for metadata extraction when available
|
||||
from SYS.metadata import extract_ytdlp_tags
|
||||
except Exception: # pragma: no cover - optional dependency
|
||||
extract_ytdlp_tags = None # type: ignore[assignment]
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default configuration
|
||||
@@ -366,6 +380,359 @@ class HTTPClient:
|
||||
return self._client.stream(method, url, **kwargs)
|
||||
|
||||
|
||||
def download_direct_file(
|
||||
url: str,
|
||||
output_dir: Path,
|
||||
debug_logger: Optional[DebugLogger] = None,
|
||||
quiet: bool = False,
|
||||
suggested_filename: Optional[str] = None,
|
||||
pipeline_progress: Optional[Any] = None,
|
||||
) -> DownloadMediaResult:
|
||||
"""Download a direct file (PDF, image, document, etc.) with guardrails and metadata hooks."""
|
||||
|
||||
ensure_directory(output_dir)
|
||||
|
||||
def _sanitize_filename(name: str) -> str:
|
||||
# Windows-safe filename sanitization.
|
||||
text = str(name or "").strip()
|
||||
if not text:
|
||||
return ""
|
||||
text = text.replace("/", "\\")
|
||||
text = text.split("\\")[-1]
|
||||
|
||||
invalid = set('<>:"/\\|?*')
|
||||
cleaned_chars: List[str] = []
|
||||
for ch in text:
|
||||
o = ord(ch)
|
||||
if o < 32 or ch in invalid:
|
||||
cleaned_chars.append(" ")
|
||||
continue
|
||||
cleaned_chars.append(ch)
|
||||
cleaned = " ".join("".join(cleaned_chars).split()).strip()
|
||||
cleaned = cleaned.rstrip(" .")
|
||||
return cleaned
|
||||
|
||||
def _unique_path(path: Path) -> Path:
|
||||
if not path.exists():
|
||||
return path
|
||||
stem = path.stem
|
||||
suffix = path.suffix
|
||||
parent = path.parent
|
||||
for i in range(1, 10_000):
|
||||
candidate = parent / f"{stem} ({i}){suffix}"
|
||||
if not candidate.exists():
|
||||
return candidate
|
||||
return parent / f"{stem} ({int(time.time())}){suffix}"
|
||||
|
||||
parsed_url = urlparse(url)
|
||||
url_path = parsed_url.path
|
||||
|
||||
filename: Optional[str] = None
|
||||
if parsed_url.query:
|
||||
query_params = parse_qs(parsed_url.query)
|
||||
for param_name in ("filename", "download", "file", "name"):
|
||||
if param_name in query_params and query_params[param_name]:
|
||||
filename = query_params[param_name][0]
|
||||
filename = unquote(filename)
|
||||
break
|
||||
|
||||
if not filename or not filename.strip():
|
||||
filename = url_path.split("/")[-1] if url_path else ""
|
||||
filename = unquote(filename)
|
||||
|
||||
if "?" in filename:
|
||||
filename = filename.split("?")[0]
|
||||
|
||||
content_type = ""
|
||||
try:
|
||||
with HTTPClient(timeout=10.0) as client:
|
||||
response = client._request("HEAD", url, follow_redirects=True)
|
||||
content_disposition = response.headers.get("content-disposition", "")
|
||||
try:
|
||||
content_type = str(response.headers.get("content-type", "") or "").strip().lower()
|
||||
except Exception:
|
||||
content_type = ""
|
||||
|
||||
if content_disposition:
|
||||
match = re.search(r'filename\*?=(?:"([^"]*)"|([^;\s]*))', content_disposition)
|
||||
if match:
|
||||
extracted_name = match.group(1) or match.group(2)
|
||||
if extracted_name:
|
||||
filename = unquote(extracted_name)
|
||||
if not quiet:
|
||||
debug(f"Filename from Content-Disposition: {filename}")
|
||||
except Exception as exc:
|
||||
if not quiet:
|
||||
log(f"Could not get filename from headers: {exc}", file=sys.stderr)
|
||||
|
||||
try:
|
||||
page_like_exts = {".php", ".asp", ".aspx", ".jsp", ".cgi"}
|
||||
ext = ""
|
||||
try:
|
||||
ext = Path(str(filename or "")).suffix.lower()
|
||||
except Exception:
|
||||
ext = ""
|
||||
|
||||
ct0 = (content_type or "").split(";", 1)[0].strip().lower()
|
||||
must_probe = bool(ct0.startswith("text/html") or ext in page_like_exts)
|
||||
|
||||
if must_probe:
|
||||
with HTTPClient(timeout=10.0) as client:
|
||||
with client._request_stream("GET", url, follow_redirects=True) as resp:
|
||||
resp.raise_for_status()
|
||||
ct = (
|
||||
str(resp.headers.get("content-type", "") or "")
|
||||
.split(";", 1)[0]
|
||||
.strip()
|
||||
.lower()
|
||||
)
|
||||
if ct.startswith("text/html"):
|
||||
raise DownloadError("URL appears to be an HTML page, not a direct file")
|
||||
except DownloadError:
|
||||
raise
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
suggested = _sanitize_filename(suggested_filename) if suggested_filename else ""
|
||||
if suggested:
|
||||
suggested_path = Path(suggested)
|
||||
if suggested_path.suffix:
|
||||
filename = suggested
|
||||
else:
|
||||
detected_ext = ""
|
||||
try:
|
||||
detected_ext = Path(str(filename)).suffix
|
||||
except Exception:
|
||||
detected_ext = ""
|
||||
filename = suggested + detected_ext if detected_ext else suggested
|
||||
|
||||
try:
|
||||
has_ext = bool(filename and Path(str(filename)).suffix)
|
||||
except Exception:
|
||||
has_ext = False
|
||||
|
||||
if filename and (not has_ext):
|
||||
ct = (content_type or "").split(";", 1)[0].strip().lower()
|
||||
ext_by_ct = {
|
||||
"application/pdf": ".pdf",
|
||||
"application/epub+zip": ".epub",
|
||||
"application/x-mobipocket-ebook": ".mobi",
|
||||
"image/jpeg": ".jpg",
|
||||
"image/png": ".png",
|
||||
"image/webp": ".webp",
|
||||
"image/gif": ".gif",
|
||||
"text/plain": ".txt",
|
||||
"application/zip": ".zip",
|
||||
}
|
||||
|
||||
if ct in ext_by_ct:
|
||||
filename = f"{filename}{ext_by_ct[ct]}"
|
||||
elif ct.startswith("text/html"):
|
||||
raise DownloadError("URL appears to be an HTML page, not a direct file")
|
||||
|
||||
if not filename or not str(filename).strip():
|
||||
raise DownloadError(
|
||||
"Could not determine filename for URL (no Content-Disposition and no path filename)"
|
||||
)
|
||||
|
||||
file_path = _unique_path(output_dir / str(filename))
|
||||
|
||||
use_pipeline_transfer = False
|
||||
try:
|
||||
if pipeline_progress is not None and hasattr(pipeline_progress, "update_transfer"):
|
||||
ui = None
|
||||
if hasattr(pipeline_progress, "ui_and_pipe_index"):
|
||||
ui, _ = pipeline_progress.ui_and_pipe_index() # type: ignore[attr-defined]
|
||||
use_pipeline_transfer = ui is not None
|
||||
except Exception:
|
||||
use_pipeline_transfer = False
|
||||
|
||||
progress_bar: Optional[ProgressBar] = None
|
||||
if (not quiet) and (not use_pipeline_transfer):
|
||||
progress_bar = ProgressBar()
|
||||
|
||||
transfer_started = [False]
|
||||
|
||||
if not quiet:
|
||||
debug(f"Direct download: {filename}")
|
||||
|
||||
try:
|
||||
start_time = time.time()
|
||||
downloaded_bytes = [0]
|
||||
transfer_started[0] = False
|
||||
|
||||
def _maybe_begin_transfer(content_length: int) -> None:
|
||||
if pipeline_progress is None or transfer_started[0]:
|
||||
return
|
||||
try:
|
||||
total_val: Optional[int] = (
|
||||
int(content_length)
|
||||
if isinstance(content_length, int) and content_length > 0
|
||||
else None
|
||||
)
|
||||
except Exception:
|
||||
total_val = None
|
||||
try:
|
||||
if hasattr(pipeline_progress, "begin_transfer"):
|
||||
pipeline_progress.begin_transfer(
|
||||
label=str(filename or "download"),
|
||||
total=total_val,
|
||||
)
|
||||
transfer_started[0] = True
|
||||
except Exception:
|
||||
return
|
||||
|
||||
def progress_callback(bytes_downloaded: int, content_length: int) -> None:
|
||||
downloaded_bytes[0] = int(bytes_downloaded or 0)
|
||||
|
||||
try:
|
||||
if pipeline_progress is not None and hasattr(pipeline_progress, "update_transfer"):
|
||||
_maybe_begin_transfer(content_length)
|
||||
total_val: Optional[int] = (
|
||||
int(content_length)
|
||||
if isinstance(content_length, int) and content_length > 0
|
||||
else None
|
||||
)
|
||||
pipeline_progress.update_transfer(
|
||||
label=str(filename or "download"),
|
||||
completed=int(bytes_downloaded or 0),
|
||||
total=total_val,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if progress_bar is not None:
|
||||
progress_bar.update(
|
||||
downloaded=int(bytes_downloaded or 0),
|
||||
total=int(content_length) if content_length and content_length > 0 else None,
|
||||
label=str(filename or "download"),
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
with HTTPClient(timeout=30.0) as client:
|
||||
client.download(url, str(file_path), progress_callback=progress_callback)
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
try:
|
||||
if progress_bar is not None:
|
||||
progress_bar.finish()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
if pipeline_progress is not None and transfer_started[0] and hasattr(
|
||||
pipeline_progress, "finish_transfer"
|
||||
):
|
||||
pipeline_progress.finish_transfer(label=str(filename or "download"))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not quiet:
|
||||
debug(f"✓ Downloaded in {elapsed:.1f}s")
|
||||
|
||||
ext_out = ""
|
||||
try:
|
||||
ext_out = Path(str(filename)).suffix.lstrip(".")
|
||||
except Exception:
|
||||
ext_out = ""
|
||||
|
||||
info: Dict[str, Any] = {
|
||||
"id": str(filename).rsplit(".", 1)[0] if "." in str(filename) else str(filename),
|
||||
"ext": ext_out,
|
||||
"webpage_url": url,
|
||||
}
|
||||
|
||||
hash_value = None
|
||||
try:
|
||||
hash_value = sha256_file(file_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
tags: List[str] = []
|
||||
if extract_ytdlp_tags:
|
||||
try:
|
||||
tags = extract_ytdlp_tags(info)
|
||||
except Exception as exc:
|
||||
log(f"Error extracting tags: {exc}", file=sys.stderr)
|
||||
|
||||
if not any(str(t).startswith("title:") for t in tags):
|
||||
info["title"] = str(filename)
|
||||
tags = []
|
||||
if extract_ytdlp_tags:
|
||||
try:
|
||||
tags = extract_ytdlp_tags(info)
|
||||
except Exception as exc:
|
||||
log(f"Error extracting tags with filename: {exc}", file=sys.stderr)
|
||||
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record(
|
||||
"direct-file-downloaded",
|
||||
{"url": url, "path": str(file_path), "hash": hash_value},
|
||||
)
|
||||
|
||||
return DownloadMediaResult(
|
||||
path=file_path,
|
||||
info=info,
|
||||
tag=tags,
|
||||
source_url=url,
|
||||
hash_value=hash_value,
|
||||
)
|
||||
|
||||
except (httpx.HTTPError, httpx.RequestError) as exc:
|
||||
try:
|
||||
if progress_bar is not None:
|
||||
progress_bar.finish()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if pipeline_progress is not None and transfer_started[0] and hasattr(
|
||||
pipeline_progress, "finish_transfer"
|
||||
):
|
||||
pipeline_progress.finish_transfer(label=str(filename or "download"))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
log(f"Download error: {exc}", file=sys.stderr)
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record(
|
||||
"exception",
|
||||
{"phase": "direct-file", "url": url, "error": str(exc)},
|
||||
)
|
||||
raise DownloadError(f"Failed to download {url}: {exc}") from exc
|
||||
|
||||
except Exception as exc:
|
||||
try:
|
||||
if progress_bar is not None:
|
||||
progress_bar.finish()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if pipeline_progress is not None and transfer_started[0] and hasattr(
|
||||
pipeline_progress, "finish_transfer"
|
||||
):
|
||||
pipeline_progress.finish_transfer(label=str(filename or "download"))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
log(f"Error downloading file: {exc}", file=sys.stderr)
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record(
|
||||
"exception",
|
||||
{
|
||||
"phase": "direct-file",
|
||||
"url": url,
|
||||
"error": str(exc),
|
||||
"traceback": traceback.format_exc(),
|
||||
},
|
||||
)
|
||||
raise DownloadError(f"Error downloading file: {exc}") from exc
|
||||
|
||||
|
||||
# Back-compat alias
|
||||
_download_direct_file = download_direct_file
|
||||
|
||||
|
||||
class AsyncHTTPClient:
|
||||
"""Unified async HTTP client with asyncio support."""
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from collections import deque
|
||||
|
||||
from SYS.logger import log
|
||||
from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS as GLOBAL_SUPPORTED_EXTENSIONS
|
||||
@@ -18,8 +19,8 @@ import tempfile
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable, Optional, Sequence, Type, TypeVar, Union, cast
|
||||
from urllib.parse import urlsplit, urlencode, quote
|
||||
from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple, Type, TypeVar, Union, cast
|
||||
from urllib.parse import urlsplit, urlencode, quote, urlunsplit, unquote
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -1828,3 +1829,742 @@ def download_hydrus_file(
|
||||
print_final_progress(filename, file_size, elapsed)
|
||||
|
||||
return downloaded
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Hydrus metadata helpers (moved from SYS.metadata)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def _normalize_hash(value: Any) -> str:
|
||||
candidate = str(value or "").strip().lower()
|
||||
if not candidate:
|
||||
raise ValueError("Hydrus hash is required")
|
||||
if len(candidate) != 64 or any(ch not in "0123456789abcdef" for ch in candidate):
|
||||
raise ValueError("Hydrus hash must be a 64-character hex string")
|
||||
return candidate
|
||||
|
||||
|
||||
def _normalize_tag(tag: Any) -> Optional[str]:
|
||||
if tag is None:
|
||||
return None
|
||||
if isinstance(tag, str):
|
||||
candidate = tag.strip()
|
||||
else:
|
||||
candidate = str(tag).strip()
|
||||
return candidate or None
|
||||
|
||||
|
||||
def _dedup_tags_by_namespace(tags: List[str], keep_first: bool = True) -> List[str]:
|
||||
if not tags:
|
||||
return []
|
||||
|
||||
namespace_to_tags: Dict[Optional[str], List[Tuple[int, str]]] = {}
|
||||
first_appearance: Dict[Optional[str], int] = {}
|
||||
|
||||
for idx, tag in enumerate(tags):
|
||||
namespace: Optional[str] = tag.split(":", 1)[0] if ":" in tag else None
|
||||
if namespace not in first_appearance:
|
||||
first_appearance[namespace] = idx
|
||||
if namespace not in namespace_to_tags:
|
||||
namespace_to_tags[namespace] = []
|
||||
namespace_to_tags[namespace].append((idx, tag))
|
||||
|
||||
result: List[Tuple[int, str]] = []
|
||||
for namespace, tag_list in namespace_to_tags.items():
|
||||
chosen_tag = tag_list[0][1] if keep_first else tag_list[-1][1]
|
||||
result.append((first_appearance[namespace], chosen_tag))
|
||||
|
||||
result.sort(key=lambda x: x[0])
|
||||
return [tag for _, tag in result]
|
||||
|
||||
|
||||
def _extract_tag_services(entry: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
tags_section = entry.get("tags")
|
||||
services: List[Dict[str, Any]] = []
|
||||
if not isinstance(tags_section, dict):
|
||||
return services
|
||||
names_map = tags_section.get("service_keys_to_names")
|
||||
if not isinstance(names_map, dict):
|
||||
names_map = {}
|
||||
|
||||
def get_record(service_key: Optional[str], service_name: Optional[str]) -> Dict[str, Any]:
|
||||
key_lower = service_key.lower() if isinstance(service_key, str) else None
|
||||
name_lower = service_name.lower() if isinstance(service_name, str) else None
|
||||
for record in services:
|
||||
existing_key = record.get("service_key")
|
||||
if key_lower and isinstance(existing_key, str) and existing_key.lower() == key_lower:
|
||||
if service_name and not record.get("service_name"):
|
||||
record["service_name"] = service_name
|
||||
return record
|
||||
existing_name = record.get("service_name")
|
||||
if name_lower and isinstance(existing_name, str) and existing_name.lower() == name_lower:
|
||||
if service_key and not record.get("service_key"):
|
||||
record["service_key"] = service_key
|
||||
return record
|
||||
record = {
|
||||
"service_key": service_key,
|
||||
"service_name": service_name,
|
||||
"tags": [],
|
||||
}
|
||||
services.append(record)
|
||||
return record
|
||||
|
||||
def _iter_current_status_lists(container: Any) -> Iterable[List[Any]]:
|
||||
if isinstance(container, dict):
|
||||
for status_key, tags_list in container.items():
|
||||
if str(status_key) != "0":
|
||||
continue
|
||||
if isinstance(tags_list, list):
|
||||
yield tags_list
|
||||
elif isinstance(container, list):
|
||||
yield container
|
||||
|
||||
statuses_map = tags_section.get("service_keys_to_statuses_to_tags")
|
||||
if isinstance(statuses_map, dict):
|
||||
for service_key, status_map in statuses_map.items():
|
||||
record = get_record(service_key if isinstance(service_key, str) else None, names_map.get(service_key))
|
||||
for tags_list in _iter_current_status_lists(status_map):
|
||||
for tag in tags_list:
|
||||
normalized = _normalize_tag(tag)
|
||||
if normalized:
|
||||
record["tags"].append(normalized)
|
||||
|
||||
ignored_keys = {
|
||||
"service_keys_to_statuses_to_tags",
|
||||
"service_keys_to_statuses_to_display_tags",
|
||||
"service_keys_to_display_friendly_tags",
|
||||
"service_keys_to_names",
|
||||
"tag_display_types_to_namespaces",
|
||||
"namespace_display_string_lookup",
|
||||
"tag_display_decoration_colour_lookup",
|
||||
}
|
||||
|
||||
for key, service in tags_section.items():
|
||||
if key in ignored_keys:
|
||||
continue
|
||||
if isinstance(service, dict):
|
||||
service_key = service.get("service_key") or (key if isinstance(key, str) else None)
|
||||
service_name = service.get("service_name") or service.get("name") or names_map.get(service_key)
|
||||
record = get_record(service_key if isinstance(service_key, str) else None, service_name)
|
||||
storage = service.get("storage_tags") or service.get("statuses_to_tags") or service.get("tags")
|
||||
if isinstance(storage, dict):
|
||||
for tags_list in _iter_current_status_lists(storage):
|
||||
for tag in tags_list:
|
||||
normalized = _normalize_tag(tag)
|
||||
if normalized:
|
||||
record["tags"].append(normalized)
|
||||
elif isinstance(storage, list):
|
||||
for tag in storage:
|
||||
normalized = _normalize_tag(tag)
|
||||
if normalized:
|
||||
record["tags"].append(normalized)
|
||||
|
||||
for record in services:
|
||||
record["tags"] = _dedup_tags_by_namespace(record["tags"], keep_first=True)
|
||||
return services
|
||||
|
||||
|
||||
def _select_primary_tags(
|
||||
services: List[Dict[str, Any]],
|
||||
aggregated: List[str],
|
||||
prefer_service: Optional[str]
|
||||
) -> Tuple[Optional[str], List[str]]:
|
||||
prefer_lower = prefer_service.lower() if isinstance(prefer_service, str) else None
|
||||
if prefer_lower:
|
||||
for record in services:
|
||||
name = record.get("service_name")
|
||||
if isinstance(name, str) and name.lower() == prefer_lower and record["tags"]:
|
||||
return record.get("service_key"), record["tags"]
|
||||
for record in services:
|
||||
if record["tags"]:
|
||||
return record.get("service_key"), record["tags"]
|
||||
return None, aggregated
|
||||
|
||||
|
||||
def _derive_title(
|
||||
tags_primary: List[str],
|
||||
tags_aggregated: List[str],
|
||||
entry: Dict[str, Any]
|
||||
) -> Optional[str]:
|
||||
for source in (tags_primary, tags_aggregated):
|
||||
for tag in source:
|
||||
namespace, sep, value = tag.partition(":")
|
||||
if sep and namespace and namespace.lower() == "title":
|
||||
cleaned = value.strip()
|
||||
if cleaned:
|
||||
return cleaned
|
||||
for key in (
|
||||
"title",
|
||||
"display_name",
|
||||
"pretty_name",
|
||||
"original_display_filename",
|
||||
"original_filename",
|
||||
):
|
||||
value = entry.get(key)
|
||||
if isinstance(value, str):
|
||||
cleaned = value.strip()
|
||||
if cleaned:
|
||||
return cleaned
|
||||
return None
|
||||
|
||||
|
||||
def _derive_clip_time(
|
||||
tags_primary: List[str],
|
||||
tags_aggregated: List[str],
|
||||
entry: Dict[str, Any]
|
||||
) -> Optional[str]:
|
||||
namespaces = {"clip", "clip_time", "cliptime"}
|
||||
for source in (tags_primary, tags_aggregated):
|
||||
for tag in source:
|
||||
namespace, sep, value = tag.partition(":")
|
||||
if sep and namespace and namespace.lower() in namespaces:
|
||||
cleaned = value.strip()
|
||||
if cleaned:
|
||||
return cleaned
|
||||
clip_value = entry.get("clip_time")
|
||||
if isinstance(clip_value, str):
|
||||
cleaned_clip = clip_value.strip()
|
||||
if cleaned_clip:
|
||||
return cleaned_clip
|
||||
return None
|
||||
|
||||
|
||||
def _summarize_hydrus_entry(
|
||||
entry: Dict[str, Any],
|
||||
prefer_service: Optional[str]
|
||||
) -> Tuple[Dict[str, Any], List[str], Optional[str], Optional[str], Optional[str]]:
|
||||
services = _extract_tag_services(entry)
|
||||
aggregated: List[str] = []
|
||||
seen: Set[str] = set()
|
||||
for record in services:
|
||||
for tag in record["tags"]:
|
||||
if tag not in seen:
|
||||
seen.add(tag)
|
||||
aggregated.append(tag)
|
||||
service_key, primary_tags = _select_primary_tags(services, aggregated, prefer_service)
|
||||
title = _derive_title(primary_tags, aggregated, entry)
|
||||
clip_time = _derive_clip_time(primary_tags, aggregated, entry)
|
||||
summary = dict(entry)
|
||||
if title and not summary.get("title"):
|
||||
summary["title"] = title
|
||||
if clip_time and not summary.get("clip_time"):
|
||||
summary["clip_time"] = clip_time
|
||||
summary["tag_service_key"] = service_key
|
||||
summary["has_current_file_service"] = _has_current_file_service(entry)
|
||||
if "is_local" not in summary:
|
||||
summary["is_local"] = bool(entry.get("is_local"))
|
||||
return summary, primary_tags, service_key, title, clip_time
|
||||
|
||||
|
||||
def _looks_like_hash(value: Any) -> bool:
|
||||
if not isinstance(value, str):
|
||||
return False
|
||||
candidate = value.strip().lower()
|
||||
return len(candidate) == 64 and all(ch in "0123456789abcdef" for ch in candidate)
|
||||
|
||||
|
||||
def _collect_relationship_hashes(payload: Any, accumulator: Set[str]) -> None:
|
||||
if isinstance(payload, dict):
|
||||
for value in payload.values():
|
||||
_collect_relationship_hashes(value, accumulator)
|
||||
elif isinstance(payload, (list, tuple, set)):
|
||||
for value in payload:
|
||||
_collect_relationship_hashes(value, accumulator)
|
||||
elif isinstance(payload, str) and _looks_like_hash(payload):
|
||||
accumulator.add(payload)
|
||||
|
||||
|
||||
def _generate_hydrus_url_variants(url: str) -> List[str]:
|
||||
seen: Set[str] = set()
|
||||
variants: List[str] = []
|
||||
|
||||
def push(candidate: Optional[str]) -> None:
|
||||
if not candidate:
|
||||
return
|
||||
text = candidate.strip()
|
||||
if not text or text in seen:
|
||||
return
|
||||
seen.add(text)
|
||||
variants.append(text)
|
||||
|
||||
push(url)
|
||||
try:
|
||||
parsed = urlsplit(url)
|
||||
except Exception:
|
||||
return variants
|
||||
|
||||
if parsed.scheme in {"http", "https"}:
|
||||
alternate_scheme = "https" if parsed.scheme == "http" else "http"
|
||||
push(urlunsplit((alternate_scheme, parsed.netloc, parsed.path, parsed.query, parsed.fragment)))
|
||||
|
||||
normalised_netloc = parsed.netloc.lower()
|
||||
if normalised_netloc and normalised_netloc != parsed.netloc:
|
||||
push(urlunsplit((parsed.scheme, normalised_netloc, parsed.path, parsed.query, parsed.fragment)))
|
||||
|
||||
if parsed.path:
|
||||
trimmed_path = parsed.path.rstrip("/")
|
||||
if trimmed_path != parsed.path:
|
||||
push(urlunsplit((parsed.scheme, parsed.netloc, trimmed_path, parsed.query, parsed.fragment)))
|
||||
else:
|
||||
push(urlunsplit((parsed.scheme, parsed.netloc, parsed.path + "/", parsed.query, parsed.fragment)))
|
||||
unquoted_path = unquote(parsed.path)
|
||||
if unquoted_path != parsed.path:
|
||||
push(urlunsplit((parsed.scheme, parsed.netloc, unquoted_path, parsed.query, parsed.fragment)))
|
||||
|
||||
if parsed.query or parsed.fragment:
|
||||
push(urlunsplit((parsed.scheme, parsed.netloc, parsed.path, "", "")))
|
||||
if parsed.path:
|
||||
unquoted_path = unquote(parsed.path)
|
||||
push(urlunsplit((parsed.scheme, parsed.netloc, unquoted_path, "", "")))
|
||||
|
||||
return variants
|
||||
|
||||
|
||||
def _build_hydrus_query(
|
||||
hashes: Optional[Sequence[str]],
|
||||
file_ids: Optional[Sequence[int]],
|
||||
include_relationships: bool,
|
||||
minimal: bool,
|
||||
) -> Dict[str, str]:
|
||||
query: Dict[str, str] = {}
|
||||
if hashes:
|
||||
query["hashes"] = json.dumps([_normalize_hash(h) for h in hashes])
|
||||
if file_ids:
|
||||
query["file_ids"] = json.dumps([int(fid) for fid in file_ids])
|
||||
if not query:
|
||||
raise ValueError("hashes or file_ids must be provided")
|
||||
query["include_service_keys_to_tags"] = json.dumps(True)
|
||||
query["include_tag_services"] = json.dumps(True)
|
||||
query["include_file_services"] = json.dumps(True)
|
||||
if include_relationships:
|
||||
query["include_file_relationships"] = json.dumps(True)
|
||||
if not minimal:
|
||||
extras = (
|
||||
"include_url",
|
||||
"include_size",
|
||||
"include_width",
|
||||
"include_height",
|
||||
"include_duration",
|
||||
"include_mime",
|
||||
"include_has_audio",
|
||||
"include_is_trashed",
|
||||
)
|
||||
for key in extras:
|
||||
query[key] = json.dumps(True)
|
||||
return query
|
||||
|
||||
|
||||
def _fetch_hydrus_entries(
|
||||
client: "HydrusNetwork",
|
||||
hashes: Optional[Sequence[str]],
|
||||
file_ids: Optional[Sequence[int]],
|
||||
include_relationships: bool,
|
||||
minimal: bool,
|
||||
) -> List[Dict[str, Any]]:
|
||||
if not hashes and not file_ids:
|
||||
return []
|
||||
spec = HydrusRequestSpec(
|
||||
method="GET",
|
||||
endpoint="/get_files/file_metadata",
|
||||
query=_build_hydrus_query(hashes, file_ids, include_relationships, minimal),
|
||||
)
|
||||
response = client._perform_request(spec)
|
||||
metadata = response.get("metadata") if isinstance(response, dict) else None
|
||||
if isinstance(metadata, list):
|
||||
return [entry for entry in metadata if isinstance(entry, dict)]
|
||||
return []
|
||||
|
||||
|
||||
def _has_current_file_service(entry: Dict[str, Any]) -> bool:
|
||||
services = entry.get("file_services")
|
||||
if not isinstance(services, dict):
|
||||
return False
|
||||
current = services.get("current")
|
||||
if isinstance(current, dict):
|
||||
for value in current.values():
|
||||
if value:
|
||||
return True
|
||||
return False
|
||||
if isinstance(current, list):
|
||||
return len(current) > 0
|
||||
return False
|
||||
|
||||
|
||||
def _compute_file_flags(entry: Dict[str, Any]) -> Tuple[bool, bool, bool]:
|
||||
mime = entry.get("mime")
|
||||
mime_lower = mime.lower() if isinstance(mime, str) else ""
|
||||
is_video = mime_lower.startswith("video/")
|
||||
is_audio = mime_lower.startswith("audio/")
|
||||
is_deleted = bool(entry.get("is_trashed"))
|
||||
file_services = entry.get("file_services")
|
||||
if not is_deleted and isinstance(file_services, dict):
|
||||
deleted = file_services.get("deleted")
|
||||
if isinstance(deleted, dict) and deleted:
|
||||
is_deleted = True
|
||||
return is_video, is_audio, is_deleted
|
||||
|
||||
|
||||
def fetch_hydrus_metadata(payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
hash_hex = None
|
||||
raw_hash_value = payload.get("hash")
|
||||
if raw_hash_value is not None:
|
||||
hash_hex = _normalize_hash(raw_hash_value)
|
||||
file_ids: List[int] = []
|
||||
raw_file_ids = payload.get("file_ids")
|
||||
if isinstance(raw_file_ids, (list, tuple, set)):
|
||||
for value in raw_file_ids:
|
||||
try:
|
||||
file_ids.append(int(value))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
elif raw_file_ids is not None:
|
||||
try:
|
||||
file_ids.append(int(raw_file_ids))
|
||||
except (TypeError, ValueError):
|
||||
file_ids = []
|
||||
raw_file_id = payload.get("file_id")
|
||||
if raw_file_id is not None:
|
||||
try:
|
||||
coerced = int(raw_file_id)
|
||||
except (TypeError, ValueError):
|
||||
coerced = None
|
||||
if coerced is not None and coerced not in file_ids:
|
||||
file_ids.append(coerced)
|
||||
base_url = str(payload.get("api_url") or "").strip()
|
||||
if not base_url:
|
||||
raise ValueError("Hydrus api_url is required")
|
||||
access_key = str(payload.get("access_key") or "").strip()
|
||||
options_raw = payload.get("options")
|
||||
options = options_raw if isinstance(options_raw, dict) else {}
|
||||
prefer_service = options.get("prefer_service_name")
|
||||
if isinstance(prefer_service, str):
|
||||
prefer_service = prefer_service.strip()
|
||||
else:
|
||||
prefer_service = None
|
||||
include_relationships = bool(options.get("include_relationships"))
|
||||
minimal = bool(options.get("minimal"))
|
||||
timeout = float(options.get("timeout") or 60.0)
|
||||
client = HydrusNetwork(base_url, access_key, timeout)
|
||||
hashes: Optional[List[str]] = None
|
||||
if hash_hex:
|
||||
hashes = [hash_hex]
|
||||
if not hashes and not file_ids:
|
||||
raise ValueError("Hydrus hash or file id is required")
|
||||
try:
|
||||
entries = _fetch_hydrus_entries(
|
||||
client,
|
||||
hashes,
|
||||
file_ids or None,
|
||||
include_relationships,
|
||||
minimal
|
||||
)
|
||||
except HydrusRequestError as exc:
|
||||
raise RuntimeError(str(exc))
|
||||
if not entries:
|
||||
response: Dict[str, Any] = {
|
||||
"hash": hash_hex,
|
||||
"metadata": {},
|
||||
"tags": [],
|
||||
"warnings": [f"No Hydrus metadata for {hash_hex or file_ids}"],
|
||||
"error": "not_found",
|
||||
}
|
||||
if file_ids:
|
||||
response["file_id"] = file_ids[0]
|
||||
return response
|
||||
entry = entries[0]
|
||||
if not hash_hex:
|
||||
entry_hash = entry.get("hash")
|
||||
if isinstance(entry_hash, str) and entry_hash:
|
||||
hash_hex = entry_hash
|
||||
hashes = [hash_hex]
|
||||
summary, primary_tags, service_key, title, clip_time = _summarize_hydrus_entry(entry, prefer_service)
|
||||
is_video, is_audio, is_deleted = _compute_file_flags(entry)
|
||||
has_current_file_service = _has_current_file_service(entry)
|
||||
is_local = bool(entry.get("is_local"))
|
||||
size_bytes = entry.get("size") or entry.get("file_size")
|
||||
filesize_mb = None
|
||||
if isinstance(size_bytes, (int, float)) and size_bytes > 0:
|
||||
filesize_mb = float(size_bytes) / (1024.0 * 1024.0)
|
||||
duration = entry.get("duration")
|
||||
if duration is None and isinstance(entry.get("duration_ms"), (int, float)):
|
||||
duration = float(entry["duration_ms"]) / 1000.0
|
||||
warnings_list: List[str] = []
|
||||
if not primary_tags:
|
||||
warnings_list.append("No tags returned for preferred service")
|
||||
relationships = None
|
||||
relationship_metadata: Dict[str, Dict[str, Any]] = {}
|
||||
if include_relationships and hash_hex:
|
||||
try:
|
||||
rel_spec = HydrusRequestSpec(
|
||||
method="GET",
|
||||
endpoint="/manage_file_relationships/get_file_relationships",
|
||||
query={"hash": hash_hex},
|
||||
)
|
||||
relationships = client._perform_request(rel_spec)
|
||||
except HydrusRequestError as exc:
|
||||
warnings_list.append(f"Relationship lookup failed: {exc}")
|
||||
relationships = None
|
||||
if isinstance(relationships, dict):
|
||||
related_hashes: Set[str] = set()
|
||||
_collect_relationship_hashes(relationships, related_hashes)
|
||||
related_hashes.discard(hash_hex)
|
||||
if related_hashes:
|
||||
try:
|
||||
related_entries = _fetch_hydrus_entries(
|
||||
client,
|
||||
sorted(related_hashes),
|
||||
None,
|
||||
False,
|
||||
True
|
||||
)
|
||||
except HydrusRequestError as exc:
|
||||
warnings_list.append(f"Relationship metadata fetch failed: {exc}")
|
||||
else:
|
||||
for rel_entry in related_entries:
|
||||
rel_hash = rel_entry.get("hash")
|
||||
if not isinstance(rel_hash, str):
|
||||
continue
|
||||
rel_summary, rel_tags, _, rel_title, rel_clip = _summarize_hydrus_entry(rel_entry, prefer_service)
|
||||
rel_summary["tags"] = rel_tags
|
||||
if rel_title:
|
||||
rel_summary["title"] = rel_title
|
||||
if rel_clip:
|
||||
rel_summary["clip_time"] = rel_clip
|
||||
relationship_metadata[rel_hash] = rel_summary
|
||||
result: Dict[str, Any] = {
|
||||
"hash": entry.get("hash") or hash_hex,
|
||||
"metadata": summary,
|
||||
"tags": primary_tags,
|
||||
"tag_service_key": service_key,
|
||||
"title": title,
|
||||
"clip_time": clip_time,
|
||||
"duration": duration,
|
||||
"filesize_mb": filesize_mb,
|
||||
"is_video": is_video,
|
||||
"is_audio": is_audio,
|
||||
"is_deleted": is_deleted,
|
||||
"is_local": is_local,
|
||||
"has_current_file_service": has_current_file_service,
|
||||
"matched_hash": entry.get("hash") or hash_hex,
|
||||
"swap_recommended": False,
|
||||
}
|
||||
file_id_value = entry.get("file_id")
|
||||
if isinstance(file_id_value, (int, float)):
|
||||
result["file_id"] = int(file_id_value)
|
||||
if relationships is not None:
|
||||
result["relationships"] = relationships
|
||||
if relationship_metadata:
|
||||
result["relationship_metadata"] = relationship_metadata
|
||||
if warnings_list:
|
||||
result["warnings"] = warnings_list
|
||||
return result
|
||||
|
||||
|
||||
def fetch_hydrus_metadata_by_url(payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
raw_url = payload.get("url") or payload.get("source_url")
|
||||
url = str(raw_url or "").strip()
|
||||
if not url:
|
||||
raise ValueError("URL is required to fetch Hydrus metadata by URL")
|
||||
base_url = str(payload.get("api_url") or "").strip()
|
||||
if not base_url:
|
||||
raise ValueError("Hydrus api_url is required")
|
||||
access_key = str(payload.get("access_key") or "").strip()
|
||||
options_raw = payload.get("options")
|
||||
options = options_raw if isinstance(options_raw, dict) else {}
|
||||
timeout = float(options.get("timeout") or 60.0)
|
||||
client = HydrusNetwork(base_url, access_key, timeout)
|
||||
hashes: Optional[List[str]] = None
|
||||
file_ids: Optional[List[int]] = None
|
||||
matched_url = None
|
||||
normalised_reported = None
|
||||
seen: Set[str] = set()
|
||||
queue = deque()
|
||||
for variant in _generate_hydrus_url_variants(url):
|
||||
queue.append(variant)
|
||||
if not queue:
|
||||
queue.append(url)
|
||||
tried_variants: List[str] = []
|
||||
while queue:
|
||||
candidate = queue.popleft()
|
||||
candidate = str(candidate or "").strip()
|
||||
if not candidate or candidate in seen:
|
||||
continue
|
||||
seen.add(candidate)
|
||||
tried_variants.append(candidate)
|
||||
spec = HydrusRequestSpec(
|
||||
method="GET",
|
||||
endpoint="/add_urls/get_url_files",
|
||||
query={"url": candidate},
|
||||
)
|
||||
try:
|
||||
response = client._perform_request(spec)
|
||||
except HydrusRequestError as exc:
|
||||
raise RuntimeError(str(exc))
|
||||
response_hashes_list: List[str] = []
|
||||
response_file_ids_list: List[int] = []
|
||||
if isinstance(response, dict):
|
||||
normalised_value = response.get("normalised_url")
|
||||
if isinstance(normalised_value, str):
|
||||
trimmed = normalised_value.strip()
|
||||
if trimmed:
|
||||
normalised_reported = normalised_reported or trimmed
|
||||
if trimmed not in seen:
|
||||
queue.append(trimmed)
|
||||
for redirect_key in ("redirect_url", "url"):
|
||||
redirect_value = response.get(redirect_key)
|
||||
if isinstance(redirect_value, str):
|
||||
redirect_trimmed = redirect_value.strip()
|
||||
if redirect_trimmed and redirect_trimmed not in seen:
|
||||
queue.append(redirect_trimmed)
|
||||
raw_hashes = response.get("hashes") or response.get("file_hashes")
|
||||
if isinstance(raw_hashes, list):
|
||||
for item in raw_hashes:
|
||||
try:
|
||||
normalized = _normalize_hash(item)
|
||||
except ValueError:
|
||||
continue
|
||||
if normalized:
|
||||
response_hashes_list.append(normalized)
|
||||
raw_ids = response.get("file_ids") or response.get("file_id")
|
||||
if isinstance(raw_ids, list):
|
||||
for item in raw_ids:
|
||||
try:
|
||||
response_file_ids_list.append(int(item))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
elif raw_ids is not None:
|
||||
try:
|
||||
response_file_ids_list.append(int(raw_ids))
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
statuses = response.get("url_file_statuses")
|
||||
if isinstance(statuses, list):
|
||||
for entry in statuses:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
status_hash = entry.get("hash") or entry.get("file_hash")
|
||||
if status_hash:
|
||||
try:
|
||||
normalized = _normalize_hash(status_hash)
|
||||
except ValueError:
|
||||
normalized = None
|
||||
if normalized:
|
||||
response_hashes_list.append(normalized)
|
||||
status_id = entry.get("file_id") or entry.get("fileid")
|
||||
if status_id is not None:
|
||||
try:
|
||||
response_file_ids_list.append(int(status_id))
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
if not hashes and response_hashes_list:
|
||||
hashes = response_hashes_list
|
||||
if not file_ids and response_file_ids_list:
|
||||
file_ids = response_file_ids_list
|
||||
if hashes or file_ids:
|
||||
matched_url = candidate
|
||||
break
|
||||
if not hashes and not file_ids:
|
||||
raise RuntimeError(
|
||||
"No Hydrus matches for URL variants: "
|
||||
+ ", ".join(tried_variants)
|
||||
)
|
||||
followup_payload = {
|
||||
"api_url": base_url,
|
||||
"access_key": access_key,
|
||||
"hash": hashes[0] if hashes else None,
|
||||
"file_ids": file_ids,
|
||||
"options": {"timeout": timeout, "minimal": True},
|
||||
}
|
||||
result = fetch_hydrus_metadata(followup_payload)
|
||||
result["matched_url"] = matched_url or url
|
||||
result["normalised_url"] = normalised_reported or matched_url or url
|
||||
result["tried_urls"] = tried_variants
|
||||
return result
|
||||
|
||||
|
||||
def _build_hydrus_context(payload: Dict[str, Any]) -> Tuple["HydrusNetwork", str, str, float, Optional[str]]:
|
||||
base_url = str(payload.get("api_url") or "").strip()
|
||||
if not base_url:
|
||||
raise ValueError("Hydrus api_url is required")
|
||||
access_key = str(payload.get("access_key") or "").strip()
|
||||
options_raw = payload.get("options")
|
||||
options = options_raw if isinstance(options_raw, dict) else {}
|
||||
timeout = float(options.get("timeout") or payload.get("timeout") or 60.0)
|
||||
prefer_service = payload.get("prefer_service_name") or options.get("prefer_service_name")
|
||||
if isinstance(prefer_service, str):
|
||||
prefer_service = prefer_service.strip() or None
|
||||
else:
|
||||
prefer_service = None
|
||||
client = HydrusNetwork(base_url, access_key, timeout)
|
||||
return client, base_url, access_key, timeout, prefer_service
|
||||
|
||||
|
||||
def _refetch_hydrus_summary(
|
||||
base_url: str,
|
||||
access_key: str,
|
||||
hash_hex: str,
|
||||
timeout: float,
|
||||
prefer_service: Optional[str]
|
||||
) -> Dict[str, Any]:
|
||||
payload: Dict[str, Any] = {
|
||||
"hash": hash_hex,
|
||||
"api_url": base_url,
|
||||
"access_key": access_key,
|
||||
"options": {
|
||||
"minimal": True,
|
||||
"include_relationships": False,
|
||||
"timeout": timeout,
|
||||
},
|
||||
}
|
||||
if prefer_service:
|
||||
payload["options"]["prefer_service_name"] = prefer_service
|
||||
return fetch_hydrus_metadata(payload)
|
||||
|
||||
|
||||
def apply_hydrus_tag_mutation(
|
||||
payload: Dict[str, Any],
|
||||
add: Iterable[Any],
|
||||
remove: Iterable[Any]
|
||||
) -> Dict[str, Any]:
|
||||
client, base_url, access_key, timeout, prefer_service = _build_hydrus_context(payload)
|
||||
hash_hex = _normalize_hash(payload.get("hash"))
|
||||
add_list = [_normalize_tag(tag) for tag in add if _normalize_tag(tag)]
|
||||
remove_list = [_normalize_tag(tag) for tag in remove if _normalize_tag(tag)]
|
||||
if not add_list and not remove_list:
|
||||
raise ValueError("No tag changes supplied")
|
||||
service_key = payload.get("service_key") or payload.get("tag_service_key")
|
||||
summary = None
|
||||
if not service_key:
|
||||
summary = _refetch_hydrus_summary(base_url, access_key, hash_hex, timeout, prefer_service)
|
||||
service_key = summary.get("tag_service_key")
|
||||
if not isinstance(service_key, str) or not service_key:
|
||||
raise RuntimeError("Unable to determine Hydrus tag service key")
|
||||
actions: Dict[str, List[str]] = {}
|
||||
if add_list:
|
||||
actions["0"] = [tag for tag in add_list if tag]
|
||||
if remove_list:
|
||||
actions["1"] = [tag for tag in remove_list if tag]
|
||||
if not actions:
|
||||
raise ValueError("Tag mutation produced no actionable changes")
|
||||
request_payload = {
|
||||
"hashes": [hash_hex],
|
||||
"service_keys_to_actions_to_tags": {
|
||||
service_key: actions,
|
||||
},
|
||||
}
|
||||
try:
|
||||
tag_spec = HydrusRequestSpec(
|
||||
method="POST",
|
||||
endpoint="/add_tags/add_tags",
|
||||
data=request_payload,
|
||||
)
|
||||
client._perform_request(tag_spec)
|
||||
except HydrusRequestError as exc:
|
||||
raise RuntimeError(str(exc))
|
||||
summary_after = _refetch_hydrus_summary(base_url, access_key, hash_hex, timeout, prefer_service)
|
||||
result = dict(summary_after)
|
||||
result["added_tags"] = actions.get("0", [])
|
||||
result["removed_tags"] = actions.get("1", [])
|
||||
result["tag_service_key"] = summary_after.get("tag_service_key")
|
||||
return result
|
||||
|
||||
27
CLI.py
27
CLI.py
@@ -845,21 +845,8 @@ class CmdletIntrospection:
|
||||
providers.keys()
|
||||
)
|
||||
|
||||
try:
|
||||
from Provider.metadata_provider import list_metadata_providers
|
||||
|
||||
meta_providers = list_metadata_providers(config) or {}
|
||||
meta_available = [n for n, ready in meta_providers.items() if ready]
|
||||
meta_choices = (
|
||||
sorted(meta_available)
|
||||
if meta_available else sorted(meta_providers.keys())
|
||||
)
|
||||
except Exception:
|
||||
meta_choices = []
|
||||
|
||||
merged = sorted(set(provider_choices + meta_choices))
|
||||
if merged:
|
||||
return merged
|
||||
if provider_choices:
|
||||
return provider_choices
|
||||
|
||||
if normalized_arg == "scrape":
|
||||
try:
|
||||
@@ -990,7 +977,15 @@ class CmdletCompleter(Completer):
|
||||
config=config
|
||||
)
|
||||
if choices:
|
||||
for choice in choices:
|
||||
choice_list = choices
|
||||
normalized_prev = prev_token.lstrip("-").strip().lower()
|
||||
if normalized_prev == "provider" and current_token:
|
||||
current_lower = current_token.lower()
|
||||
filtered = [c for c in choices if current_lower in c.lower()]
|
||||
if filtered:
|
||||
choice_list = filtered
|
||||
|
||||
for choice in choice_list:
|
||||
yield Completion(choice, start_position=-len(current_token))
|
||||
# Example: if the user has typed `download-file -url ...`, then `url`
|
||||
# is considered used and should not be suggested again (even as `--url`).
|
||||
|
||||
@@ -343,7 +343,7 @@ def _run_op(op: str, data: Any) -> Dict[str, Any]:
|
||||
|
||||
# Fast gate: only for streaming URLs yt-dlp knows about.
|
||||
try:
|
||||
from SYS.download import is_url_supported_by_ytdlp # noqa: WPS433
|
||||
from tool.ytdlp import is_url_supported_by_ytdlp # noqa: WPS433
|
||||
|
||||
if not is_url_supported_by_ytdlp(url):
|
||||
return {
|
||||
|
||||
@@ -6,20 +6,22 @@ osd-bar=no
|
||||
border=no
|
||||
|
||||
# Keep the window size stable when loading files (don't resize to match aspect).
|
||||
keepaspect-window=no
|
||||
|
||||
# Ensure uosc texture/icon fonts are discoverable by libass.
|
||||
osd-fonts-dir=~~/scripts/uosc/fonts
|
||||
sub-fonts-dir=~~/scripts/uosc/
|
||||
|
||||
auto-window-resize=no
|
||||
|
||||
ontop=yes
|
||||
autofit=100%
|
||||
|
||||
save-position-on-quit=yes
|
||||
|
||||
# Avoid showing embedded cover art for audio-only files.
|
||||
audio-display=no
|
||||
# Stretch the video to fill the window (ignore aspect ratio, may distort)
|
||||
keepaspect=no
|
||||
video-unscaled=no
|
||||
cursor-autohide=1000
|
||||
|
||||
# gpu-next can be fragile on some Windows/D3D11 setups; prefer the stable VO.
|
||||
vo=gpu
|
||||
@@ -34,7 +36,7 @@ background=none
|
||||
background-color=0/0
|
||||
|
||||
# Without transparency, these options may be useful:
|
||||
background-color=.2 # don't use pure black
|
||||
# background-color=.2 # don't use pure black (disabled to keep video background transparent)
|
||||
force-window-position # recenter the window when changing playlist position on X11 and macOS
|
||||
auto-window-resize=no # preserve the window size when changing playlist entry
|
||||
|
||||
@@ -79,11 +81,3 @@ reset-on-next-file-remove=video-zoom # preserve the zoom when changing file
|
||||
reset-on-next-file-remove=panscan
|
||||
reset-on-next-file-remove=video-unscaled
|
||||
linear-downscaling=no # don't make black and white manga brighter
|
||||
|
||||
|
||||
git config --global user.name "Nose"
|
||||
git config --global user.email "goyimnose@nothing.blah"
|
||||
|
||||
ssh-keygen -t ed25519 -C "goyimnose@nothing.blah" -f $env:USERPROFILE\.ssh\id_ed25519
|
||||
|
||||
git remote set-url origin goyimnose@nothing.blah:OWNER/REPO.git
|
||||
@@ -182,7 +182,7 @@ time_precision=0
|
||||
# Display stream's buffered time in timeline if it's lower than this amount of seconds, 0 to disable
|
||||
buffered_time_threshold=60
|
||||
# Hide UI when mpv autohides the cursor. Timing is controlled by `cursor-autohide` in `mpv.conf` (in milliseconds).
|
||||
autohide=no
|
||||
autohide=yes
|
||||
# Can be: flash, static, manual (controlled by flash-pause-indicator and decide-pause-indicator commands)
|
||||
pause_indicator=flash
|
||||
# Sizes to list in stream quality menu
|
||||
|
||||
345
Provider/HIFI.py
345
Provider/HIFI.py
@@ -1,12 +1,15 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import shutil
|
||||
import string
|
||||
import subprocess
|
||||
import time
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import subprocess
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
||||
from API.hifi import HifiApiClient
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from SYS.logger import debug, log
|
||||
@@ -733,6 +736,10 @@ class HIFI(Provider):
|
||||
input_ref: str,
|
||||
output_path: Path,
|
||||
lossless_fallback: bool = True,
|
||||
progress: Optional[Any] = None,
|
||||
transfer_label: Optional[str] = None,
|
||||
duration_seconds: Optional[int] = None,
|
||||
audio_quality: Optional[str] = None,
|
||||
) -> Optional[Path]:
|
||||
ffmpeg_path = self._find_ffmpeg()
|
||||
if not ffmpeg_path:
|
||||
@@ -749,20 +756,115 @@ class HIFI(Provider):
|
||||
|
||||
protocol_whitelist = "file,https,http,tcp,tls,crypto,data"
|
||||
|
||||
def _run(cmd: List[str]) -> bool:
|
||||
label = str(transfer_label or output_path.name or "hifi")
|
||||
|
||||
def _estimate_total_bytes() -> Optional[int]:
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
dur = int(duration_seconds) if duration_seconds is not None else None
|
||||
except Exception:
|
||||
dur = None
|
||||
if not dur or dur <= 0:
|
||||
return None
|
||||
|
||||
qual = str(audio_quality or "").strip().lower()
|
||||
# Rough per-quality bitrate guess (bytes/sec).
|
||||
if qual in {"hi_res",
|
||||
"hi_res_lossless",
|
||||
"hires",
|
||||
"hi-res",
|
||||
"master",
|
||||
"mqa"}:
|
||||
bps = 4_608_000 # ~24-bit/96k stereo
|
||||
elif qual in {"lossless",
|
||||
"flac"}:
|
||||
bps = 1_411_200 # 16-bit/44.1k stereo
|
||||
else:
|
||||
bps = 320_000 # kbps for compressed
|
||||
|
||||
try:
|
||||
return int((bps / 8.0) * dur)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
est_total_bytes = _estimate_total_bytes()
|
||||
|
||||
def _update_transfer(total_bytes_val: Optional[int]) -> None:
|
||||
if progress is None:
|
||||
return
|
||||
try:
|
||||
progress.update_transfer(
|
||||
label=label,
|
||||
completed=int(total_bytes_val) if total_bytes_val is not None else None,
|
||||
total=est_total_bytes,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _run(cmd: List[str], *, target_path: Optional[Path] = None) -> bool:
|
||||
cmd_progress = list(cmd)
|
||||
# Enable ffmpeg progress output for live byte updates.
|
||||
cmd_progress.insert(1, "-progress")
|
||||
cmd_progress.insert(2, "pipe:1")
|
||||
cmd_progress.insert(3, "-nostats")
|
||||
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
cmd_progress,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
)
|
||||
if proc.returncode == 0 and self._has_nonempty_file(output_path):
|
||||
return True
|
||||
if proc.stderr:
|
||||
debug(f"[hifi] ffmpeg failed: {proc.stderr.strip()}")
|
||||
except Exception as exc:
|
||||
debug(f"[hifi] ffmpeg invocation failed: {exc}")
|
||||
return False
|
||||
|
||||
last_bytes = None
|
||||
try:
|
||||
while True:
|
||||
line = proc.stdout.readline() if proc.stdout else ""
|
||||
if not line:
|
||||
if proc.poll() is not None:
|
||||
break
|
||||
time.sleep(0.05)
|
||||
continue
|
||||
|
||||
if "=" not in line:
|
||||
continue
|
||||
key, val = line.strip().split("=", 1)
|
||||
if key == "total_size":
|
||||
try:
|
||||
last_bytes = int(val)
|
||||
_update_transfer(last_bytes)
|
||||
except Exception:
|
||||
pass
|
||||
elif key == "out_time_ms":
|
||||
# Map out_time_ms to byte estimate when total_size missing.
|
||||
try:
|
||||
if est_total_bytes and val.isdigit():
|
||||
ms = int(val)
|
||||
dur_ms = (duration_seconds or 0) * 1000
|
||||
if dur_ms > 0:
|
||||
pct = min(1.0, max(0.0, ms / dur_ms))
|
||||
approx = int(est_total_bytes * pct)
|
||||
_update_transfer(approx)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
proc.wait()
|
||||
finally:
|
||||
if last_bytes is not None:
|
||||
_update_transfer(last_bytes)
|
||||
|
||||
check_path = target_path or output_path
|
||||
if proc.returncode == 0 and self._has_nonempty_file(check_path):
|
||||
return True
|
||||
|
||||
try:
|
||||
stderr_text = proc.stderr.read() if proc.stderr else ""
|
||||
if stderr_text:
|
||||
debug(f"[hifi] ffmpeg failed: {stderr_text.strip()}")
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
|
||||
# Prefer remux (fast, no transcode).
|
||||
@@ -816,25 +918,14 @@ class HIFI(Provider):
|
||||
"flac",
|
||||
str(tmp_flac_path),
|
||||
]
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
cmd_flac,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
if proc.returncode == 0 and self._has_nonempty_file(tmp_flac_path):
|
||||
if tmp_flac_path != flac_path:
|
||||
try:
|
||||
tmp_flac_path.replace(flac_path)
|
||||
except Exception:
|
||||
# If rename fails, still return the temp file.
|
||||
return tmp_flac_path
|
||||
return flac_path
|
||||
if proc.stderr:
|
||||
debug(f"[hifi] ffmpeg flac fallback failed: {proc.stderr.strip()}")
|
||||
except Exception as exc:
|
||||
debug(f"[hifi] ffmpeg flac fallback invocation failed: {exc}")
|
||||
if _run(cmd_flac, target_path=tmp_flac_path) and self._has_nonempty_file(tmp_flac_path):
|
||||
if tmp_flac_path != flac_path:
|
||||
try:
|
||||
tmp_flac_path.replace(flac_path)
|
||||
except Exception:
|
||||
# If rename fails, still return the temp file.
|
||||
return tmp_flac_path
|
||||
return flac_path
|
||||
return None
|
||||
|
||||
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
|
||||
@@ -921,7 +1012,14 @@ class HIFI(Provider):
|
||||
# If resolve_tidal_manifest_path returned a URL, prefer feeding it directly to ffmpeg.
|
||||
if resolved_text.lower().startswith("http"):
|
||||
out_file = output_dir / f"{stem}{suffix}"
|
||||
materialized = self._ffmpeg_demux_to_audio(input_ref=resolved_text, output_path=out_file)
|
||||
materialized = self._ffmpeg_demux_to_audio(
|
||||
input_ref=resolved_text,
|
||||
output_path=out_file,
|
||||
progress=self.config.get("_pipeline_progress") if isinstance(self.config, dict) else None,
|
||||
transfer_label=title_part or getattr(result, "title", None),
|
||||
duration_seconds=self._coerce_duration_seconds(md),
|
||||
audio_quality=md.get("audioQuality") if isinstance(md, dict) else None,
|
||||
)
|
||||
if materialized is not None:
|
||||
return materialized
|
||||
|
||||
@@ -947,7 +1045,14 @@ class HIFI(Provider):
|
||||
if source_path.is_file() and source_path.suffix.lower() == ".mpd":
|
||||
# Materialize audio from the local MPD.
|
||||
out_file = output_dir / f"{stem}{suffix}"
|
||||
materialized = self._ffmpeg_demux_to_audio(input_ref=str(source_path), output_path=out_file)
|
||||
materialized = self._ffmpeg_demux_to_audio(
|
||||
input_ref=str(source_path),
|
||||
output_path=out_file,
|
||||
progress=self.config.get("_pipeline_progress") if isinstance(self.config, dict) else None,
|
||||
transfer_label=title_part or getattr(result, "title", None),
|
||||
duration_seconds=self._coerce_duration_seconds(md),
|
||||
audio_quality=md.get("audioQuality") if isinstance(md, dict) else None,
|
||||
)
|
||||
if materialized is not None:
|
||||
return materialized
|
||||
return None
|
||||
@@ -965,7 +1070,14 @@ class HIFI(Provider):
|
||||
|
||||
# As a last resort, attempt to treat the local path as an ffmpeg input.
|
||||
out_file = output_dir / f"{stem}{suffix}"
|
||||
materialized = self._ffmpeg_demux_to_audio(input_ref=resolved_text, output_path=out_file)
|
||||
materialized = self._ffmpeg_demux_to_audio(
|
||||
input_ref=resolved_text,
|
||||
output_path=out_file,
|
||||
progress=self.config.get("_pipeline_progress") if isinstance(self.config, dict) else None,
|
||||
transfer_label=title_part or getattr(result, "title", None),
|
||||
duration_seconds=self._coerce_duration_seconds(md),
|
||||
audio_quality=md.get("audioQuality") if isinstance(md, dict) else None,
|
||||
)
|
||||
return materialized
|
||||
|
||||
def _get_api_client_for_base(self, base_url: str) -> Optional[HifiApiClient]:
|
||||
@@ -1228,6 +1340,38 @@ class HIFI(Provider):
|
||||
minutes, secs = divmod(total, 60)
|
||||
return f"{minutes}:{secs:02d}"
|
||||
|
||||
@staticmethod
|
||||
def _coerce_duration_seconds(value: Any) -> Optional[int]:
|
||||
candidates = []
|
||||
candidates.append(value)
|
||||
try:
|
||||
if isinstance(value, dict):
|
||||
for key in ("duration",
|
||||
"durationSeconds",
|
||||
"duration_sec",
|
||||
"duration_ms",
|
||||
"durationMillis"):
|
||||
if key in value:
|
||||
candidates.append(value.get(key))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
for cand in candidates:
|
||||
try:
|
||||
if cand is None:
|
||||
continue
|
||||
if isinstance(cand, str) and cand.strip().endswith("ms"):
|
||||
cand = cand.strip()[:-2]
|
||||
v = float(cand)
|
||||
if v <= 0:
|
||||
continue
|
||||
if v > 10_000: # treat as milliseconds
|
||||
v = v / 1000.0
|
||||
return int(round(v))
|
||||
except Exception:
|
||||
continue
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _stringify(value: Any) -> str:
|
||||
text = str(value or "").strip()
|
||||
@@ -1305,23 +1449,18 @@ class HIFI(Provider):
|
||||
if audio_quality:
|
||||
columns.append(("Quality", audio_quality))
|
||||
|
||||
tags = {"tidal"}
|
||||
if audio_quality:
|
||||
tags.add(f"quality:{audio_quality.lower()}")
|
||||
metadata = item.get("mediaMetadata")
|
||||
if isinstance(metadata, dict):
|
||||
tag_values = metadata.get("tags") or []
|
||||
for tag in tag_values:
|
||||
if isinstance(tag, str) and tag.strip():
|
||||
tags.add(tag.strip().lower())
|
||||
|
||||
# IMPORTANT: do not retain a shared reference to the raw API dict.
|
||||
# Downstream playback (MPV) mutates metadata to cache the decoded Tidal
|
||||
# manifest path/URL. If multiple results share the same dict reference,
|
||||
# they can incorrectly collapse to a single playable target.
|
||||
full_md: Dict[str, Any] = dict(item)
|
||||
url_value = self._stringify(full_md.get("url"))
|
||||
if url_value:
|
||||
full_md["url"] = url_value
|
||||
|
||||
return SearchResult(
|
||||
tags = self._build_track_tags(full_md)
|
||||
|
||||
result = SearchResult(
|
||||
table="hifi",
|
||||
title=title,
|
||||
path=path,
|
||||
@@ -1332,6 +1471,12 @@ class HIFI(Provider):
|
||||
columns=columns,
|
||||
full_metadata=full_md,
|
||||
)
|
||||
if url_value:
|
||||
try:
|
||||
result.url = url_value
|
||||
except Exception:
|
||||
pass
|
||||
return result
|
||||
|
||||
def _extract_track_selection_context(
|
||||
self, selected_items: List[Any]
|
||||
@@ -1401,6 +1546,9 @@ class HIFI(Provider):
|
||||
def _fetch_track_details(self, track_id: int) -> Optional[Dict[str, Any]]:
|
||||
if track_id <= 0:
|
||||
return None
|
||||
|
||||
info_data = self._fetch_track_info(track_id)
|
||||
|
||||
for base in self.api_urls:
|
||||
endpoint = f"{base.rstrip('/')}/track/"
|
||||
try:
|
||||
@@ -1408,12 +1556,32 @@ class HIFI(Provider):
|
||||
payload = client.track(track_id) if client else None
|
||||
data = payload.get("data") if isinstance(payload, dict) else None
|
||||
if isinstance(data, dict):
|
||||
return data
|
||||
merged: Dict[str, Any] = {}
|
||||
if isinstance(info_data, dict):
|
||||
merged.update(info_data)
|
||||
merged.update(data)
|
||||
return merged
|
||||
except Exception as exc:
|
||||
log(f"[hifi] Track lookup failed for {endpoint}: {exc}", file=sys.stderr)
|
||||
continue
|
||||
return None
|
||||
|
||||
def _fetch_track_info(self, track_id: int) -> Optional[Dict[str, Any]]:
|
||||
if track_id <= 0:
|
||||
return None
|
||||
for base in self.api_urls:
|
||||
endpoint = f"{base.rstrip('/')}/info/"
|
||||
try:
|
||||
client = self._get_api_client_for_base(base)
|
||||
payload = client.info(track_id) if client else None
|
||||
data = payload.get("data") if isinstance(payload, dict) else None
|
||||
if isinstance(data, dict):
|
||||
return data
|
||||
except Exception as exc:
|
||||
debug(f"[hifi] Info lookup failed for {endpoint}: {exc}")
|
||||
continue
|
||||
return None
|
||||
|
||||
def _fetch_track_lyrics(self, track_id: int) -> Optional[Dict[str, Any]]:
|
||||
if track_id <= 0:
|
||||
return None
|
||||
@@ -1450,6 +1618,54 @@ class HIFI(Provider):
|
||||
]
|
||||
return [(name, value) for name, value in values if value]
|
||||
|
||||
def _build_track_tags(self, metadata: Dict[str, Any]) -> set[str]:
|
||||
tags: set[str] = {"tidal"}
|
||||
|
||||
audio_quality = self._stringify(metadata.get("audioQuality"))
|
||||
if audio_quality:
|
||||
tags.add(f"quality:{audio_quality.lower()}")
|
||||
|
||||
media_md = metadata.get("mediaMetadata")
|
||||
if isinstance(media_md, dict):
|
||||
tag_values = media_md.get("tags") or []
|
||||
for tag in tag_values:
|
||||
if isinstance(tag, str):
|
||||
candidate = tag.strip()
|
||||
if candidate:
|
||||
tags.add(candidate.lower())
|
||||
|
||||
title_text = self._stringify(metadata.get("title"))
|
||||
if title_text:
|
||||
tags.add(f"title:{title_text}")
|
||||
|
||||
artists = self._extract_artists(metadata)
|
||||
for artist in artists:
|
||||
artist_clean = self._stringify(artist)
|
||||
if artist_clean:
|
||||
tags.add(f"artist:{artist_clean}")
|
||||
|
||||
album_title = ""
|
||||
album_obj = metadata.get("album")
|
||||
if isinstance(album_obj, dict):
|
||||
album_title = self._stringify(album_obj.get("title"))
|
||||
else:
|
||||
album_title = self._stringify(metadata.get("album"))
|
||||
if album_title:
|
||||
tags.add(f"album:{album_title}")
|
||||
|
||||
track_no_val = metadata.get("trackNumber") or metadata.get("track_number")
|
||||
if track_no_val is not None:
|
||||
try:
|
||||
track_int = int(track_no_val)
|
||||
if track_int > 0:
|
||||
tags.add(f"track:{track_int}")
|
||||
except Exception:
|
||||
track_text = self._stringify(track_no_val)
|
||||
if track_text:
|
||||
tags.add(f"track:{track_text}")
|
||||
|
||||
return tags
|
||||
|
||||
def selector(
|
||||
self,
|
||||
selected_items: List[Any],
|
||||
@@ -1476,16 +1692,32 @@ class HIFI(Provider):
|
||||
else None
|
||||
)
|
||||
|
||||
try:
|
||||
debug(
|
||||
f"[hifi.selector] table_type={table_type} stage_is_last={stage_is_last} selected_count={len(selected_items) if selected_items else 0}"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Artist selection: selecting @N should open an albums list.
|
||||
if isinstance(table_type, str) and table_type.strip().lower() == "hifi.artist":
|
||||
contexts = self._extract_artist_selection_context(selected_items)
|
||||
try:
|
||||
debug(f"[hifi.selector] artist contexts={len(contexts)}")
|
||||
except Exception:
|
||||
pass
|
||||
if not contexts:
|
||||
return False
|
||||
|
||||
artist_id, artist_name = contexts[0]
|
||||
album_results = self._albums_for_artist(artist_id=artist_id, artist_name=artist_name, limit=200)
|
||||
if not album_results:
|
||||
return False
|
||||
try:
|
||||
from SYS.rich_display import stdout_console
|
||||
stdout_console().print(f"[bold yellow][hifi] No albums found for {artist_name}[/]")
|
||||
except Exception:
|
||||
log(f"[hifi] No albums found for {artist_name}")
|
||||
return True
|
||||
|
||||
try:
|
||||
from SYS.rich_display import stdout_console
|
||||
@@ -1531,6 +1763,10 @@ class HIFI(Provider):
|
||||
# Album selection: selecting @N should open the track list for that album.
|
||||
if isinstance(table_type, str) and table_type.strip().lower() == "hifi.album":
|
||||
contexts = self._extract_album_selection_context(selected_items)
|
||||
try:
|
||||
debug(f"[hifi.selector] album contexts={len(contexts)}")
|
||||
except Exception:
|
||||
pass
|
||||
if not contexts:
|
||||
return False
|
||||
|
||||
@@ -1605,6 +1841,10 @@ class HIFI(Provider):
|
||||
return False
|
||||
|
||||
contexts = self._extract_track_selection_context(selected_items)
|
||||
try:
|
||||
debug(f"[hifi.selector] track contexts={len(contexts)}")
|
||||
except Exception:
|
||||
pass
|
||||
if not contexts:
|
||||
return False
|
||||
|
||||
@@ -1657,6 +1897,9 @@ class HIFI(Provider):
|
||||
insert_pos = 2 if artist_display else 1
|
||||
columns.insert(insert_pos, ("Album", album_title))
|
||||
|
||||
tags = self._build_track_tags(detail)
|
||||
url_value = self._stringify(detail.get("url"))
|
||||
|
||||
result = SearchResult(
|
||||
table="hifi",
|
||||
title=title,
|
||||
@@ -1666,7 +1909,13 @@ class HIFI(Provider):
|
||||
media_kind="audio",
|
||||
columns=columns,
|
||||
full_metadata=detail,
|
||||
tag=tags,
|
||||
)
|
||||
if url_value:
|
||||
try:
|
||||
result.url = url_value
|
||||
except Exception:
|
||||
pass
|
||||
table.add_result(result)
|
||||
try:
|
||||
results_payload.append(result.to_dict())
|
||||
|
||||
@@ -8,12 +8,11 @@ from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional, Callable, Tuple
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from API.HTTP import HTTPClient
|
||||
from API.HTTP import HTTPClient, _download_direct_file
|
||||
from API.alldebrid import AllDebridClient, parse_magnet_or_hash, is_torrent_file
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from ProviderCore.download import sanitize_filename
|
||||
from SYS.download import _download_direct_file
|
||||
from SYS.logger import log
|
||||
from SYS.logger import log, debug
|
||||
from SYS.models import DownloadError
|
||||
|
||||
_HOSTS_CACHE_TTL_SECONDS = 24 * 60 * 60
|
||||
@@ -302,7 +301,7 @@ def _dispatch_alldebrid_magnet_search(
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
log(f"[alldebrid] Sent magnet {magnet_id} to AllDebrid for download", file=sys.stderr)
|
||||
debug(f"[alldebrid] Sent magnet {magnet_id} to AllDebrid for download")
|
||||
|
||||
|
||||
def prepare_magnet(
|
||||
|
||||
192
Provider/hello_provider.py
Normal file
192
Provider/hello_provider.py
Normal file
@@ -0,0 +1,192 @@
|
||||
"""Example provider template for use as a starter kit.
|
||||
|
||||
This minimal provider demonstrates the typical hooks a provider may implement:
|
||||
- `validate()` to assert it's usable
|
||||
- `search()` to return `SearchResult` items
|
||||
- `download()` to persist a sample file (useful for local tests)
|
||||
|
||||
See `docs/provider_guide.md` for authoring guidance.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
|
||||
|
||||
class HelloProvider(Provider):
|
||||
"""Very small example provider suitable as a template.
|
||||
|
||||
- Table name: `hello`
|
||||
- Usage: `search-file -provider hello "query"`
|
||||
- Selecting a row and piping into `download-file` will call `download()`.
|
||||
"""
|
||||
|
||||
URL = ("hello:",)
|
||||
URL_DOMAINS = ()
|
||||
|
||||
def validate(self) -> bool:
|
||||
# No configuration required; always available for testing/demo purposes.
|
||||
return True
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
q = (query or "").strip()
|
||||
results: List[SearchResult] = []
|
||||
if not q or q in {"*", "all", "list"}:
|
||||
q = "example"
|
||||
|
||||
# Emit up to `limit` tiny example results.
|
||||
n = min(max(1, int(limit)), 3)
|
||||
for i in range(1, n + 1):
|
||||
title = f"{q} sample {i}"
|
||||
path = f"https://example.org/{q}/{i}"
|
||||
sr = SearchResult(
|
||||
table="hello",
|
||||
title=title,
|
||||
path=path,
|
||||
detail="Example provider result",
|
||||
media_kind="file",
|
||||
columns=[("Example", "yes")],
|
||||
full_metadata={"example_index": i},
|
||||
)
|
||||
results.append(sr)
|
||||
|
||||
return results[: max(0, int(limit))]
|
||||
|
||||
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
|
||||
"""Create a small text file to simulate a download.
|
||||
|
||||
This keeps the example self-contained (no network access required) and
|
||||
makes it straightforward to test provider behavior with `pytest`.
|
||||
"""
|
||||
try:
|
||||
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
title = str(getattr(result, "title", "hello") or "hello").strip()
|
||||
safe = "".join(c if c.isalnum() or c in ("-", "_", ".") else "_" for c in title)
|
||||
fname = f"{safe}.txt" if safe else "hello.txt"
|
||||
dest = Path(output_dir) / fname
|
||||
try:
|
||||
dest.write_text(f"Hello from HelloProvider\nsource: {result.path}\n", encoding="utf-8")
|
||||
return dest
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def selector(
|
||||
self,
|
||||
selected_items: List[Any],
|
||||
*,
|
||||
ctx: Any,
|
||||
stage_is_last: bool = True,
|
||||
**_kwargs: Any,
|
||||
) -> bool:
|
||||
"""Present a simple details table when a HelloProvider row is selected.
|
||||
|
||||
This demonstrates how providers can implement custom `@N` selection
|
||||
behavior by constructing a `ResultTable`, populating it with
|
||||
provider-specific rows, and instructing the CLI to show the table.
|
||||
"""
|
||||
if not stage_is_last:
|
||||
return False
|
||||
|
||||
def _as_payload(item: Any) -> Dict[str, Any]:
|
||||
if isinstance(item, dict):
|
||||
return dict(item)
|
||||
try:
|
||||
if hasattr(item, "to_dict"):
|
||||
maybe = item.to_dict()
|
||||
if isinstance(maybe, dict):
|
||||
return maybe
|
||||
except Exception:
|
||||
pass
|
||||
payload: Dict[str, Any] = {}
|
||||
try:
|
||||
payload = {
|
||||
"title": getattr(item, "title", None),
|
||||
"path": getattr(item, "path", None),
|
||||
"table": getattr(item, "table", None),
|
||||
"annotations": getattr(item, "annotations", None),
|
||||
"media_kind": getattr(item, "media_kind", None),
|
||||
"full_metadata": getattr(item, "full_metadata", None),
|
||||
}
|
||||
except Exception:
|
||||
payload = {}
|
||||
return payload
|
||||
|
||||
chosen: List[Dict[str, Any]] = []
|
||||
for item in selected_items or []:
|
||||
payload = _as_payload(item)
|
||||
meta = payload.get("full_metadata") or {}
|
||||
if not isinstance(meta, dict):
|
||||
meta = {}
|
||||
idx = meta.get("example_index")
|
||||
if idx is None:
|
||||
continue
|
||||
title = str(payload.get("title") or payload.get("path") or "").strip() or f"hello-{idx}"
|
||||
chosen.append({"index": idx, "title": title, "path": payload.get("path")})
|
||||
|
||||
if not chosen:
|
||||
return False
|
||||
|
||||
target = chosen[0]
|
||||
idx = target.get("index")
|
||||
title = target.get("title") or f"hello-{idx}"
|
||||
|
||||
try:
|
||||
from SYS.result_table import ResultTable
|
||||
from SYS.rich_display import stdout_console
|
||||
except Exception:
|
||||
# If ResultTable isn't available, consider selection handled
|
||||
return True
|
||||
|
||||
table = ResultTable(f"Hello Details: {title}").set_preserve_order(True)
|
||||
table.set_table("hello")
|
||||
try:
|
||||
table.set_table_metadata({"provider": "hello", "view": "details", "example_index": idx})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
table.set_source_command("download-file", [])
|
||||
|
||||
results_payload: List[Dict[str, Any]] = []
|
||||
for part in ("a", "b"):
|
||||
file_title = f"{title} - part {part}"
|
||||
file_path = f"{target.get('path')}/{part}"
|
||||
sr = SearchResult(
|
||||
table="hello",
|
||||
title=file_title,
|
||||
path=file_path,
|
||||
detail=f"Part {part}",
|
||||
media_kind="file",
|
||||
columns=[("Part", part)],
|
||||
full_metadata={"part": part, "example_index": idx},
|
||||
)
|
||||
table.add_result(sr)
|
||||
try:
|
||||
results_payload.append(sr.to_dict())
|
||||
except Exception:
|
||||
results_payload.append({"table": sr.table, "title": sr.title, "path": sr.path})
|
||||
|
||||
try:
|
||||
ctx.set_last_result_table(table, results_payload)
|
||||
ctx.set_current_stage_table(table)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
stdout_console().print()
|
||||
stdout_console().print(table)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return True
|
||||
@@ -1224,6 +1224,9 @@ class LibgenSearch:
|
||||
if results:
|
||||
_call(log_info, f"[libgen] Using mirror: {mirror}")
|
||||
return results
|
||||
else:
|
||||
_call(log_info, f"[libgen] Mirror returned 0 results; stopping mirror fallback")
|
||||
break
|
||||
except requests.exceptions.Timeout:
|
||||
_call(log_info, f"[libgen] Mirror timed out: {mirror}")
|
||||
continue
|
||||
|
||||
@@ -304,7 +304,7 @@ class PodcastIndex(Provider):
|
||||
pass
|
||||
|
||||
try:
|
||||
from SYS.download import _download_direct_file
|
||||
from API.HTTP import _download_direct_file
|
||||
except Exception:
|
||||
return True
|
||||
|
||||
|
||||
442
Provider/torrent.py
Normal file
442
Provider/torrent.py
Normal file
@@ -0,0 +1,442 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import requests
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from SYS.logger import debug, log
|
||||
try: # Preferred HTML parser
|
||||
from lxml import html as lxml_html
|
||||
except Exception: # pragma: no cover - optional
|
||||
lxml_html = None # type: ignore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class TorrentInfo:
|
||||
name: str
|
||||
url: str
|
||||
seeders: int
|
||||
leechers: int
|
||||
size: str
|
||||
source: str
|
||||
category: Optional[str] = None
|
||||
uploader: Optional[str] = None
|
||||
magnet: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class SearchParams:
|
||||
name: str
|
||||
category: Optional[str] = None
|
||||
order_column: Optional[str] = None
|
||||
order_ascending: bool = False
|
||||
|
||||
|
||||
_MAGNET_RE = re.compile(r"^magnet", re.IGNORECASE)
|
||||
|
||||
|
||||
class Scraper:
|
||||
def __init__(self, name: str, base_url: str, timeout: float = 10.0) -> None:
|
||||
self.name = name
|
||||
self.base = base_url.rstrip("/")
|
||||
self.timeout = timeout
|
||||
self.headers = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0 Safari/537.36"
|
||||
)
|
||||
}
|
||||
self.params: Optional[SearchParams] = None
|
||||
|
||||
def find(self, params: SearchParams, pages: int = 1) -> List[TorrentInfo]:
|
||||
self.params = params
|
||||
results: List[TorrentInfo] = []
|
||||
for page in range(1, max(1, pages) + 1):
|
||||
try:
|
||||
results.extend(self._get_page(page))
|
||||
except Exception as exc:
|
||||
debug(f"[{self.name}] page fetch failed: {exc}")
|
||||
return results
|
||||
|
||||
def _get_page(self, page: int) -> List[TorrentInfo]:
|
||||
url, payload = self._request_data(page)
|
||||
try:
|
||||
resp = requests.get(
|
||||
url,
|
||||
params=payload,
|
||||
headers=self.headers,
|
||||
timeout=self.timeout,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return self._parse_search(resp)
|
||||
except Exception as exc:
|
||||
debug(f"[{self.name}] request failed: {exc}")
|
||||
return []
|
||||
|
||||
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
|
||||
return self.base, {}
|
||||
|
||||
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]: # pragma: no cover - interface
|
||||
raise NotImplementedError
|
||||
|
||||
def _parse_detail(self, url: str) -> Optional[str]: # optional override
|
||||
try:
|
||||
resp = requests.get(url, headers=self.headers, timeout=self.timeout)
|
||||
resp.raise_for_status()
|
||||
return self._parse_detail_response(resp)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _parse_detail_response(self, response: requests.Response) -> Optional[str]: # pragma: no cover - interface
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _int_from_text(value: Any) -> int:
|
||||
try:
|
||||
return int(str(value).strip().replace(",", ""))
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
|
||||
class NyaaScraper(Scraper):
|
||||
def __init__(self) -> None:
|
||||
super().__init__("nyaa.si", "https://nyaa.si")
|
||||
|
||||
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
|
||||
params = self.params or SearchParams(name="")
|
||||
payload = {
|
||||
"p": page,
|
||||
"q": params.name,
|
||||
"c": params.category or "0_0",
|
||||
"f": "0",
|
||||
}
|
||||
if params.order_column:
|
||||
payload["s"] = params.order_column
|
||||
payload["o"] = "asc" if params.order_ascending else "desc"
|
||||
return f"{self.base}/", payload
|
||||
|
||||
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
|
||||
if lxml_html is None:
|
||||
return []
|
||||
doc = lxml_html.fromstring(response.text)
|
||||
rows = doc.xpath("//table//tbody/tr")
|
||||
results: List[TorrentInfo] = []
|
||||
for row in rows:
|
||||
cells = row.xpath("./td")
|
||||
if len(cells) < 7:
|
||||
continue
|
||||
category_cell, name_cell, links_cell, size_cell, _, seed_cell, leech_cell, *_ = cells
|
||||
|
||||
name_links = name_cell.xpath("./a")
|
||||
name_tag = name_links[1] if len(name_links) > 1 else (name_links[0] if name_links else None)
|
||||
if name_tag is None:
|
||||
continue
|
||||
|
||||
name = name_tag.get("title") or (name_tag.text_content() or "").strip()
|
||||
url = name_tag.get("href") or ""
|
||||
|
||||
magnet_link = None
|
||||
magnet_candidates = links_cell.xpath('.//a[starts-with(@href,"magnet:")]/@href')
|
||||
if magnet_candidates:
|
||||
magnet_link = magnet_candidates[0]
|
||||
|
||||
category_title = None
|
||||
cat_titles = category_cell.xpath(".//a/@title")
|
||||
if cat_titles:
|
||||
category_title = cat_titles[0]
|
||||
|
||||
results.append(
|
||||
TorrentInfo(
|
||||
name=name,
|
||||
url=f"{self.base}{url}",
|
||||
seeders=self._int_from_text(seed_cell.text_content()),
|
||||
leechers=self._int_from_text(leech_cell.text_content()),
|
||||
size=(size_cell.text_content() or "").strip(),
|
||||
source=self.name,
|
||||
category=category_title,
|
||||
magnet=magnet_link,
|
||||
)
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
class X1337Scraper(Scraper):
|
||||
def __init__(self) -> None:
|
||||
super().__init__("1337x.to", "https://1337x.to")
|
||||
|
||||
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
|
||||
params = self.params or SearchParams(name="")
|
||||
order = None
|
||||
if params.order_column:
|
||||
direction = "asc" if params.order_ascending else "desc"
|
||||
order = f"{params.order_column}/{direction}"
|
||||
|
||||
category = params.category
|
||||
name = requests.utils.quote(params.name)
|
||||
|
||||
if order and category:
|
||||
path = f"/sort-category-search/{name}/{category}/{order}"
|
||||
elif category:
|
||||
path = f"/category-search/{name}/{category}"
|
||||
elif order:
|
||||
path = f"/sort-search/{name}/{order}"
|
||||
else:
|
||||
path = f"/search/{name}"
|
||||
|
||||
url = f"{self.base}{path}/{page}/"
|
||||
return url, {}
|
||||
|
||||
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
|
||||
if lxml_html is None:
|
||||
return []
|
||||
doc = lxml_html.fromstring(response.text)
|
||||
rows = doc.xpath("//table//tbody/tr")
|
||||
results: List[TorrentInfo] = []
|
||||
for row in rows:
|
||||
cells = row.xpath("./td")
|
||||
if len(cells) < 6:
|
||||
continue
|
||||
name_cell, seeds_cell, leech_cell, _, size_cell, uploader_cell = cells
|
||||
|
||||
links = name_cell.xpath(".//a")
|
||||
if len(links) < 2:
|
||||
continue
|
||||
|
||||
torrent_path = links[1].get("href")
|
||||
torrent_url = f"{self.base}{torrent_path}" if torrent_path else ""
|
||||
|
||||
info = TorrentInfo(
|
||||
name=(links[1].text_content() or "").strip(),
|
||||
url=torrent_url,
|
||||
seeders=self._int_from_text(seeds_cell.text_content()),
|
||||
leechers=self._int_from_text(leech_cell.text_content()),
|
||||
size=(size_cell.text_content() or "").strip().replace(",", ""),
|
||||
source=self.name,
|
||||
uploader=(uploader_cell.text_content() or "").strip() if uploader_cell is not None else None,
|
||||
)
|
||||
|
||||
if not info.magnet:
|
||||
info.magnet = self._parse_detail(info.url)
|
||||
results.append(info)
|
||||
return results
|
||||
|
||||
def _parse_detail_response(self, response: requests.Response) -> Optional[str]:
|
||||
if lxml_html is None:
|
||||
return None
|
||||
doc = lxml_html.fromstring(response.text)
|
||||
links = doc.xpath("//main//a[starts-with(@href,'magnet:')]/@href")
|
||||
return links[0] if links else None
|
||||
|
||||
|
||||
class YTSScraper(Scraper):
|
||||
TRACKERS = "&tr=".join(
|
||||
[
|
||||
"udp://open.demonii.com:1337/announce",
|
||||
"udp://tracker.opentrackr.org:1337/announce",
|
||||
"udp://tracker.leechers-paradise.org:6969",
|
||||
]
|
||||
)
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__("yts.mx", "https://yts.mx/api/v2")
|
||||
self.headers = {}
|
||||
|
||||
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
|
||||
params = self.params or SearchParams(name="")
|
||||
payload = {
|
||||
"limit": 50,
|
||||
"page": page,
|
||||
"query_term": params.name,
|
||||
"sort_by": "seeds",
|
||||
"order_by": "desc" if not params.order_ascending else "asc",
|
||||
}
|
||||
return f"{self.base}/list_movies.json", payload
|
||||
|
||||
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
|
||||
results: List[TorrentInfo] = []
|
||||
data = response.json()
|
||||
if data.get("status") != "ok":
|
||||
return results
|
||||
movies = (data.get("data") or {}).get("movies") or []
|
||||
for movie in movies:
|
||||
torrents = movie.get("torrents") or []
|
||||
if not torrents:
|
||||
continue
|
||||
tor = max(torrents, key=lambda t: t.get("seeds", 0))
|
||||
name = movie.get("title") or "unknown"
|
||||
info = TorrentInfo(
|
||||
name=name,
|
||||
url=str(movie.get("id") or ""),
|
||||
seeders=int(tor.get("seeds", 0) or 0),
|
||||
leechers=int(tor.get("peers", 0) or 0),
|
||||
size=str(tor.get("size") or ""),
|
||||
source=self.name,
|
||||
category=(movie.get("genres") or [None])[0],
|
||||
magnet=self._build_magnet(tor, name),
|
||||
)
|
||||
results.append(info)
|
||||
return results
|
||||
|
||||
def _build_magnet(self, torrent: Dict[str, Any], name: str) -> str:
|
||||
return (
|
||||
f"magnet:?xt=urn:btih:{torrent.get('hash')}"
|
||||
f"&dn={requests.utils.quote(name)}&tr={self.TRACKERS}"
|
||||
)
|
||||
|
||||
|
||||
class ApiBayScraper(Scraper):
|
||||
"""Scraper for apibay.org (The Pirate Bay API clone)."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__("apibay.org", "https://apibay.org")
|
||||
|
||||
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
|
||||
_ = page # single-page API
|
||||
params = self.params or SearchParams(name="")
|
||||
return f"{self.base}/q.php", {"q": params.name}
|
||||
|
||||
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
|
||||
results: List[TorrentInfo] = []
|
||||
try:
|
||||
data = response.json()
|
||||
except Exception:
|
||||
return results
|
||||
if not isinstance(data, list):
|
||||
return results
|
||||
|
||||
for item in data:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
name = str(item.get("name") or "").strip()
|
||||
info_hash = str(item.get("info_hash") or "").strip()
|
||||
if not name or not info_hash:
|
||||
continue
|
||||
|
||||
magnet = self._build_magnet(info_hash, name)
|
||||
seeders = self._int_from_text(item.get("seeders"))
|
||||
leechers = self._int_from_text(item.get("leechers"))
|
||||
size_raw = str(item.get("size") or "").strip()
|
||||
size_fmt = self._format_size(size_raw)
|
||||
|
||||
results.append(
|
||||
TorrentInfo(
|
||||
name=name,
|
||||
url=f"{self.base}/description.php?id={item.get('id')}",
|
||||
seeders=seeders,
|
||||
leechers=leechers,
|
||||
size=size_fmt,
|
||||
source=self.name,
|
||||
category=str(item.get("category") or ""),
|
||||
uploader=str(item.get("username") or ""),
|
||||
magnet=magnet,
|
||||
)
|
||||
)
|
||||
return results
|
||||
|
||||
@staticmethod
|
||||
def _build_magnet(info_hash: str, name: str) -> str:
|
||||
return f"magnet:?xt=urn:btih:{info_hash}&dn={requests.utils.quote(name)}"
|
||||
|
||||
@staticmethod
|
||||
def _format_size(size_raw: str) -> str:
|
||||
try:
|
||||
size_int = int(size_raw)
|
||||
if size_int <= 0:
|
||||
return size_raw
|
||||
gb = size_int / (1024 ** 3)
|
||||
if gb >= 1:
|
||||
return f"{gb:.1f} GB"
|
||||
mb = size_int / (1024 ** 2)
|
||||
return f"{mb:.1f} MB"
|
||||
except Exception:
|
||||
return size_raw
|
||||
|
||||
|
||||
class Torrent(Provider):
|
||||
TABLE_AUTO_STAGES = {"torrent": ["download-file"]}
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
|
||||
super().__init__(config)
|
||||
self.scrapers: List[Scraper] = []
|
||||
# JSON APIs (no lxml dependency)
|
||||
self.scrapers.append(ApiBayScraper())
|
||||
self.scrapers.append(YTSScraper())
|
||||
# HTML scrapers require lxml
|
||||
if lxml_html is not None:
|
||||
self.scrapers.append(NyaaScraper())
|
||||
self.scrapers.append(X1337Scraper())
|
||||
else:
|
||||
log("[torrent] lxml not installed; skipping Nyaa/1337x scrapers", file=None)
|
||||
|
||||
def validate(self) -> bool:
|
||||
return bool(self.scrapers)
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**_kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
q = str(query or "").strip()
|
||||
if not q:
|
||||
return []
|
||||
|
||||
params = SearchParams(name=q, order_column="seeders", order_ascending=False)
|
||||
results: List[TorrentInfo] = []
|
||||
|
||||
for scraper in self.scrapers:
|
||||
try:
|
||||
scraped = scraper.find(params, pages=1)
|
||||
results.extend(scraped)
|
||||
except Exception as exc:
|
||||
debug(f"[torrent] scraper {scraper.name} failed: {exc}")
|
||||
continue
|
||||
|
||||
results = sorted(results, key=lambda r: r.seeders, reverse=True)
|
||||
if limit and limit > 0:
|
||||
results = results[:limit]
|
||||
|
||||
out: List[SearchResult] = []
|
||||
for item in results:
|
||||
path = item.magnet or item.url
|
||||
columns = [
|
||||
("TITLE", item.name),
|
||||
("Seeds", str(item.seeders)),
|
||||
("Leechers", str(item.leechers)),
|
||||
("Size", item.size or ""),
|
||||
("Source", item.source),
|
||||
]
|
||||
if item.uploader:
|
||||
columns.append(("Uploader", item.uploader))
|
||||
|
||||
md = {
|
||||
"magnet": item.magnet,
|
||||
"url": item.url,
|
||||
"source": item.source,
|
||||
"seeders": item.seeders,
|
||||
"leechers": item.leechers,
|
||||
"size": item.size,
|
||||
}
|
||||
if item.uploader:
|
||||
md["uploader"] = item.uploader
|
||||
|
||||
out.append(
|
||||
SearchResult(
|
||||
table="torrent",
|
||||
title=item.name,
|
||||
path=path,
|
||||
detail=f"Seeds:{item.seeders} | Size:{item.size}",
|
||||
annotations=[item.source],
|
||||
media_kind="other",
|
||||
columns=columns,
|
||||
full_metadata=md,
|
||||
tag={"torrent"},
|
||||
)
|
||||
)
|
||||
return out
|
||||
185
Provider/vimm.py
Normal file
185
Provider/vimm.py
Normal file
@@ -0,0 +1,185 @@
|
||||
"""Vimm provider skeleton (lxml + HTTPClient).
|
||||
|
||||
This is a lightweight, resilient provider implementation intended as a
|
||||
starting point for implementing a full Vimm (vimm.net) provider.
|
||||
|
||||
It prefers server-rendered HTML parsing via lxml and uses the repo's
|
||||
`HTTPClient` helper for robust HTTP calls (timeouts/retries).
|
||||
|
||||
Selectors in `search()` are intentionally permissive heuristics; update the
|
||||
XPaths to match the real site HTML when you have an actual fixture.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import sys
|
||||
from typing import Any, Dict, List, Optional
|
||||
from urllib.parse import urljoin, quote_plus
|
||||
from lxml import html as lxml_html
|
||||
|
||||
from API.HTTP import HTTPClient
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from SYS.logger import log, debug
|
||||
|
||||
|
||||
class Vimm(Provider):
|
||||
"""Provider for vimm.net vault listings (skeleton).
|
||||
|
||||
- Uses lxml for parsing
|
||||
- No authentication required
|
||||
"""
|
||||
|
||||
URL = ("https://vimm.net/vault/",)
|
||||
URL_DOMAINS = ("vimm.net",)
|
||||
|
||||
def validate(self) -> bool:
|
||||
# This provider has no required config; consider more checks if needed.
|
||||
return True
|
||||
|
||||
def _parse_size_bytes(self, size_str: str) -> Optional[int]:
|
||||
if not size_str:
|
||||
return None
|
||||
try:
|
||||
s = str(size_str or "").strip().replace(",", "")
|
||||
m = re.search(r"(?P<val>[\d\.]+)\s*(?P<unit>[KMGT]?B)?", s, flags=re.I)
|
||||
if not m:
|
||||
return None
|
||||
val = float(m.group("val"))
|
||||
unit = (m.group("unit") or "B").upper()
|
||||
mul = {
|
||||
"B": 1,
|
||||
"KB": 1024,
|
||||
"MB": 1024 ** 2,
|
||||
"GB": 1024 ** 3,
|
||||
"TB": 1024 ** 4,
|
||||
}.get(unit, 1)
|
||||
return int(val * mul)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
q = (query or "").strip()
|
||||
if not q:
|
||||
return []
|
||||
|
||||
# Build search/list URL
|
||||
base = "https://vimm.net/vault/"
|
||||
url = f"{base}?p=list&q={quote_plus(q)}"
|
||||
|
||||
try:
|
||||
with HTTPClient(timeout=20.0) as client:
|
||||
resp = client.get(url)
|
||||
content = resp.content
|
||||
except Exception as exc:
|
||||
log(f"[vimm] HTTP fetch failed: {exc}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
try:
|
||||
doc = lxml_html.fromstring(content)
|
||||
except Exception as exc:
|
||||
log(f"[vimm] HTML parse failed: {exc}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
results: List[SearchResult] = []
|
||||
|
||||
# Candidate XPaths for list items (tweak to match real DOM)
|
||||
container_xpaths = [
|
||||
'//div[contains(@class,"list-item")]',
|
||||
'//div[contains(@class,"result")]',
|
||||
'//li[contains(@class,"item")]',
|
||||
'//tr[contains(@class,"result")]',
|
||||
'//article',
|
||||
]
|
||||
|
||||
nodes = []
|
||||
for xp in container_xpaths:
|
||||
try:
|
||||
found = doc.xpath(xp)
|
||||
if found:
|
||||
nodes = found
|
||||
debug(f"[vimm] using xpath {xp} -> {len(found)} nodes")
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Fallback: try generic anchors under a list area
|
||||
if not nodes:
|
||||
try:
|
||||
nodes = doc.xpath('//div[contains(@id,"list")]/div') or doc.xpath('//div[contains(@class,"results")]/div')
|
||||
except Exception:
|
||||
nodes = []
|
||||
|
||||
for n in (nodes or [])[: max(1, int(limit))]:
|
||||
try:
|
||||
# Prefer explicit title anchors
|
||||
title = None
|
||||
href = None
|
||||
try:
|
||||
# a few heuristic searches for a meaningful anchor
|
||||
a = (n.xpath('.//a[contains(@class,"title")]') or
|
||||
n.xpath('.//h2/a') or
|
||||
n.xpath('.//a[contains(@href,"/vault/")]') or
|
||||
n.xpath('.//a'))
|
||||
if a:
|
||||
a0 = a[0]
|
||||
title = a0.text_content().strip()
|
||||
href = a0.get('href')
|
||||
except Exception:
|
||||
title = None
|
||||
href = None
|
||||
|
||||
if not title:
|
||||
title = (n.text_content() or "").strip()
|
||||
|
||||
path = urljoin(base, href) if href else ""
|
||||
|
||||
# Extract size & platform heuristics
|
||||
size_text = ""
|
||||
try:
|
||||
s = n.xpath('.//*[contains(@class,"size")]/text()') or n.xpath('.//span[contains(text(),"MB") or contains(text(),"GB")]/text()')
|
||||
if s:
|
||||
size_text = str(s[0]).strip()
|
||||
except Exception:
|
||||
size_text = ""
|
||||
|
||||
size_bytes = self._parse_size_bytes(size_text)
|
||||
|
||||
platform = ""
|
||||
try:
|
||||
p = n.xpath('.//*[contains(@class,"platform")]/text()')
|
||||
if p:
|
||||
platform = str(p[0]).strip()
|
||||
except Exception:
|
||||
platform = ""
|
||||
|
||||
columns = []
|
||||
if platform:
|
||||
columns.append(("Platform", platform))
|
||||
if size_text:
|
||||
columns.append(("Size", size_text))
|
||||
|
||||
results.append(
|
||||
SearchResult(
|
||||
table="vimm",
|
||||
title=str(title or "").strip(),
|
||||
path=str(path or ""),
|
||||
detail="",
|
||||
annotations=[],
|
||||
media_kind="file",
|
||||
size_bytes=size_bytes,
|
||||
tag={"vimm"},
|
||||
columns=columns,
|
||||
full_metadata={"raw": lxml_html.tostring(n, encoding="unicode")},
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return results[: max(0, int(limit))]
|
||||
@@ -26,8 +26,7 @@ class SearchResult:
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary for pipeline processing."""
|
||||
|
||||
return {
|
||||
out = {
|
||||
"table": self.table,
|
||||
"title": self.title,
|
||||
"path": self.path,
|
||||
@@ -40,6 +39,15 @@ class SearchResult:
|
||||
"full_metadata": self.full_metadata,
|
||||
}
|
||||
|
||||
try:
|
||||
url_value = getattr(self, "url", None)
|
||||
if url_value is not None:
|
||||
out["url"] = url_value
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class Provider(ABC):
|
||||
"""Unified provider base class.
|
||||
|
||||
@@ -1,75 +1,238 @@
|
||||
"""Provider registry.
|
||||
|
||||
Concrete provider implementations live in the `Provider/` package.
|
||||
This module is the single source of truth for provider discovery.
|
||||
Concrete provider implementations live in the ``Provider`` package. This module
|
||||
is the single source of truth for discovery, metadata, and lifecycle helpers
|
||||
for those plugins.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Optional, Sequence, Type
|
||||
import importlib
|
||||
import pkgutil
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from types import ModuleType
|
||||
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Type
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from SYS.logger import log
|
||||
|
||||
from ProviderCore.base import Provider, SearchProvider, FileProvider, SearchResult
|
||||
from Provider.alldebrid import AllDebrid
|
||||
from Provider.bandcamp import Bandcamp
|
||||
from Provider.libgen import Libgen
|
||||
from Provider.matrix import Matrix
|
||||
from Provider.openlibrary import OpenLibrary
|
||||
from Provider.soulseek import Soulseek, download_soulseek_file
|
||||
from Provider.telegram import Telegram
|
||||
from Provider.youtube import YouTube
|
||||
from Provider.fileio import FileIO
|
||||
from Provider.zeroxzero import ZeroXZero
|
||||
from Provider.loc import LOC
|
||||
from Provider.internetarchive import InternetArchive
|
||||
from Provider.podcastindex import PodcastIndex
|
||||
from Provider.HIFI import HIFI
|
||||
from ProviderCore.base import FileProvider, Provider, SearchProvider, SearchResult
|
||||
from Provider.soulseek import download_soulseek_file
|
||||
|
||||
_PROVIDERS: Dict[str,
|
||||
Type[Provider]] = {
|
||||
# Search-capable providers
|
||||
"alldebrid": AllDebrid,
|
||||
"libgen": Libgen,
|
||||
"openlibrary": OpenLibrary,
|
||||
"internetarchive": InternetArchive,
|
||||
"hifi": HIFI,
|
||||
"soulseek": Soulseek,
|
||||
"bandcamp": Bandcamp,
|
||||
"youtube": YouTube,
|
||||
"telegram": Telegram,
|
||||
"loc": LOC,
|
||||
"podcastindex": PodcastIndex,
|
||||
# Upload-capable providers
|
||||
"0x0": ZeroXZero,
|
||||
"file.io": FileIO,
|
||||
"matrix": Matrix,
|
||||
}
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ProviderInfo:
|
||||
"""Metadata about a single provider entry."""
|
||||
|
||||
canonical_name: str
|
||||
provider_class: Type[Provider]
|
||||
module: str
|
||||
alias_names: Tuple[str, ...] = field(default_factory=tuple)
|
||||
|
||||
@property
|
||||
def supports_search(self) -> bool:
|
||||
return self.provider_class.search is not Provider.search
|
||||
|
||||
@property
|
||||
def supports_upload(self) -> bool:
|
||||
return self.provider_class.upload is not Provider.upload
|
||||
|
||||
|
||||
class ProviderRegistry:
|
||||
"""Handles discovery, registration, and lookup of provider classes."""
|
||||
|
||||
def __init__(self, package_name: str) -> None:
|
||||
self.package_name = (package_name or "").strip()
|
||||
self._infos: Dict[str, ProviderInfo] = {}
|
||||
self._lookup: Dict[str, ProviderInfo] = {}
|
||||
self._modules: set[str] = set()
|
||||
self._discovered = False
|
||||
|
||||
def _normalize(self, value: Any) -> str:
|
||||
return str(value or "").strip().lower()
|
||||
|
||||
def _candidate_names(self,
|
||||
provider_class: Type[Provider],
|
||||
override_name: Optional[str]) -> List[str]:
|
||||
names: List[str] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
def _add(value: Any) -> None:
|
||||
text = str(value or "").strip()
|
||||
normalized = text.lower()
|
||||
if not text or normalized in seen:
|
||||
return
|
||||
seen.add(normalized)
|
||||
names.append(text)
|
||||
|
||||
if override_name:
|
||||
_add(override_name)
|
||||
else:
|
||||
_add(getattr(provider_class, "PROVIDER_NAME", None))
|
||||
_add(getattr(provider_class, "NAME", None))
|
||||
|
||||
_add(getattr(provider_class, "__name__", None))
|
||||
|
||||
for alias in getattr(provider_class, "PROVIDER_ALIASES", ()) or ():
|
||||
_add(alias)
|
||||
|
||||
return names
|
||||
|
||||
def register(
|
||||
self,
|
||||
provider_class: Type[Provider],
|
||||
*,
|
||||
override_name: Optional[str] = None,
|
||||
extra_aliases: Optional[Sequence[str]] = None,
|
||||
module_name: Optional[str] = None,
|
||||
replace: bool = False,
|
||||
) -> ProviderInfo:
|
||||
"""Register a provider class with canonical and alias names."""
|
||||
|
||||
candidates = self._candidate_names(provider_class, override_name)
|
||||
if not candidates:
|
||||
raise ValueError("provider name candidates are required")
|
||||
|
||||
canonical = self._normalize(candidates[0])
|
||||
if not canonical:
|
||||
raise ValueError("provider name must not be empty")
|
||||
|
||||
alias_names: List[str] = []
|
||||
alias_seen: set[str] = set()
|
||||
|
||||
for candidate in candidates[1:]:
|
||||
normalized = self._normalize(candidate)
|
||||
if not normalized or normalized == canonical or normalized in alias_seen:
|
||||
continue
|
||||
alias_seen.add(normalized)
|
||||
alias_names.append(normalized)
|
||||
|
||||
for alias in extra_aliases or ():
|
||||
normalized = self._normalize(alias)
|
||||
if not normalized or normalized == canonical or normalized in alias_seen:
|
||||
continue
|
||||
alias_seen.add(normalized)
|
||||
alias_names.append(normalized)
|
||||
|
||||
info = ProviderInfo(
|
||||
canonical_name=canonical,
|
||||
provider_class=provider_class,
|
||||
module=module_name or getattr(provider_class, "__module__", "") or "",
|
||||
alias_names=tuple(alias_names),
|
||||
)
|
||||
|
||||
existing = self._infos.get(canonical)
|
||||
if existing is not None and not replace:
|
||||
return existing
|
||||
|
||||
self._infos[canonical] = info
|
||||
for lookup in (canonical,) + tuple(alias_names):
|
||||
self._lookup[lookup] = info
|
||||
return info
|
||||
|
||||
def _register_module(self, module: ModuleType) -> None:
|
||||
module_name = getattr(module, "__name__", "")
|
||||
if not module_name or module_name in self._modules:
|
||||
return
|
||||
self._modules.add(module_name)
|
||||
|
||||
for attr in dir(module):
|
||||
candidate = getattr(module, attr)
|
||||
if not isinstance(candidate, type):
|
||||
continue
|
||||
if not issubclass(candidate, Provider):
|
||||
continue
|
||||
if candidate in {Provider, SearchProvider, FileProvider}:
|
||||
continue
|
||||
if getattr(candidate, "__module__", "") != module_name:
|
||||
continue
|
||||
try:
|
||||
self.register(candidate, module_name=module_name)
|
||||
except Exception as exc:
|
||||
log(f"[provider] Failed to register {module_name}.{candidate.__name__}: {exc}", file=sys.stderr)
|
||||
|
||||
def discover(self) -> None:
|
||||
"""Import and register providers from the package."""
|
||||
|
||||
if self._discovered or not self.package_name:
|
||||
return
|
||||
self._discovered = True
|
||||
|
||||
try:
|
||||
package = importlib.import_module(self.package_name)
|
||||
except Exception as exc:
|
||||
log(f"[provider] Failed to import package {self.package_name}: {exc}", file=sys.stderr)
|
||||
return
|
||||
|
||||
self._register_module(package)
|
||||
package_path = getattr(package, "__path__", None)
|
||||
if not package_path:
|
||||
return
|
||||
|
||||
for finder, module_name, _ in pkgutil.iter_modules(package_path):
|
||||
if module_name.startswith("_"):
|
||||
continue
|
||||
module_path = f"{self.package_name}.{module_name}"
|
||||
try:
|
||||
module = importlib.import_module(module_path)
|
||||
except Exception as exc:
|
||||
log(f"[provider] Failed to load {module_path}: {exc}", file=sys.stderr)
|
||||
continue
|
||||
self._register_module(module)
|
||||
|
||||
def get(self, name: str) -> Optional[ProviderInfo]:
|
||||
self.discover()
|
||||
if not name:
|
||||
return None
|
||||
return self._lookup.get(self._normalize(name))
|
||||
|
||||
def iter_providers(self) -> Iterable[ProviderInfo]:
|
||||
self.discover()
|
||||
return tuple(self._infos.values())
|
||||
|
||||
def has_name(self, name: str) -> bool:
|
||||
return self.get(name) is not None
|
||||
|
||||
|
||||
REGISTRY = ProviderRegistry("Provider")
|
||||
REGISTRY.discover()
|
||||
|
||||
|
||||
def register_provider(
|
||||
provider_class: Type[Provider],
|
||||
*,
|
||||
name: Optional[str] = None,
|
||||
aliases: Optional[Sequence[str]] = None,
|
||||
module_name: Optional[str] = None,
|
||||
replace: bool = False,
|
||||
) -> ProviderInfo:
|
||||
"""Register a provider class from tests or third-party packages."""
|
||||
|
||||
return REGISTRY.register(
|
||||
provider_class,
|
||||
override_name=name,
|
||||
extra_aliases=aliases,
|
||||
module_name=module_name,
|
||||
replace=replace,
|
||||
)
|
||||
|
||||
|
||||
def get_provider_class(name: str) -> Optional[Type[Provider]]:
|
||||
"""Return the provider class for a registered provider name, if any."""
|
||||
key = str(name or "").strip().lower()
|
||||
return _PROVIDERS.get(key)
|
||||
info = REGISTRY.get(name)
|
||||
if info is None:
|
||||
return None
|
||||
return info.provider_class
|
||||
|
||||
|
||||
def selection_auto_stage_for_table(
|
||||
table_type: str,
|
||||
stage_args: Optional[Sequence[str]] = None,
|
||||
) -> Optional[list[str]]:
|
||||
"""Return the provider-suggested stage to auto-run for a selected table.
|
||||
|
||||
This is used by the CLI to avoid hardcoding table names and behaviors.
|
||||
"""
|
||||
t = str(table_type or "").strip().lower()
|
||||
if not t:
|
||||
return None
|
||||
|
||||
# Provider tables are usually either:
|
||||
# - "youtube" (no dot)
|
||||
# - "hifi.tracks" (prefix = provider name)
|
||||
provider_key = t.split(".", 1)[0] if "." in t else t
|
||||
provider_class = get_provider_class(provider_key) or get_provider_class(t)
|
||||
if provider_class is None:
|
||||
@@ -82,14 +245,7 @@ def selection_auto_stage_for_table(
|
||||
|
||||
|
||||
def is_known_provider_name(name: str) -> bool:
|
||||
"""Return True if `name` matches a registered provider key.
|
||||
|
||||
This is intentionally cheap (no imports/instantiation) so callers can
|
||||
probe UI strings (table names, store names, etc.) without triggering
|
||||
noisy 'Unknown provider' logs.
|
||||
"""
|
||||
|
||||
return (name or "").strip().lower() in _PROVIDERS
|
||||
return REGISTRY.has_name(name)
|
||||
|
||||
|
||||
def _supports_search(provider: Provider) -> bool:
|
||||
@@ -107,18 +263,14 @@ def _provider_url_patterns(provider_class: Type[Provider]) -> Sequence[str]:
|
||||
return []
|
||||
|
||||
|
||||
def get_provider(name: str,
|
||||
config: Optional[Dict[str,
|
||||
Any]] = None) -> Optional[Provider]:
|
||||
"""Get a provider by name (unified registry)."""
|
||||
|
||||
provider_class = _PROVIDERS.get((name or "").lower())
|
||||
if provider_class is None:
|
||||
def get_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[Provider]:
|
||||
info = REGISTRY.get(name)
|
||||
if info is None:
|
||||
log(f"[provider] Unknown provider: {name}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
try:
|
||||
provider = provider_class(config)
|
||||
provider = info.provider_class(config)
|
||||
if not provider.validate():
|
||||
log(f"[provider] Provider '{name}' is not available", file=sys.stderr)
|
||||
return None
|
||||
@@ -129,24 +281,18 @@ def get_provider(name: str,
|
||||
|
||||
|
||||
def list_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
|
||||
"""List all providers and their availability."""
|
||||
|
||||
availability: Dict[str,
|
||||
bool] = {}
|
||||
for name, provider_class in _PROVIDERS.items():
|
||||
availability: Dict[str, bool] = {}
|
||||
for info in REGISTRY.iter_providers():
|
||||
try:
|
||||
provider = provider_class(config)
|
||||
availability[name] = provider.validate()
|
||||
provider = info.provider_class(config)
|
||||
availability[info.canonical_name] = provider.validate()
|
||||
except Exception:
|
||||
availability[name] = False
|
||||
availability[info.canonical_name] = False
|
||||
return availability
|
||||
|
||||
|
||||
def get_search_provider(name: str,
|
||||
config: Optional[Dict[str,
|
||||
Any]] = None) -> Optional[SearchProvider]:
|
||||
"""Get a search-capable provider by name (compat API)."""
|
||||
|
||||
config: Optional[Dict[str, Any]] = None) -> Optional[SearchProvider]:
|
||||
provider = get_provider(name, config)
|
||||
if provider is None:
|
||||
return None
|
||||
@@ -157,26 +303,20 @@ def get_search_provider(name: str,
|
||||
|
||||
|
||||
def list_search_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
|
||||
"""List all search providers and their availability."""
|
||||
|
||||
availability: Dict[str,
|
||||
bool] = {}
|
||||
for name, provider_class in _PROVIDERS.items():
|
||||
availability: Dict[str, bool] = {}
|
||||
for info in REGISTRY.iter_providers():
|
||||
try:
|
||||
provider = provider_class(config)
|
||||
availability[name] = bool(
|
||||
provider.validate() and _supports_search(provider)
|
||||
provider = info.provider_class(config)
|
||||
availability[info.canonical_name] = bool(
|
||||
provider.validate() and info.supports_search
|
||||
)
|
||||
except Exception:
|
||||
availability[name] = False
|
||||
availability[info.canonical_name] = False
|
||||
return availability
|
||||
|
||||
|
||||
def get_file_provider(name: str,
|
||||
config: Optional[Dict[str,
|
||||
Any]] = None) -> Optional[FileProvider]:
|
||||
"""Get an upload-capable provider by name (compat API)."""
|
||||
|
||||
config: Optional[Dict[str, Any]] = None) -> Optional[FileProvider]:
|
||||
provider = get_provider(name, config)
|
||||
if provider is None:
|
||||
return None
|
||||
@@ -187,28 +327,19 @@ def get_file_provider(name: str,
|
||||
|
||||
|
||||
def list_file_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
|
||||
"""List all file providers and their availability."""
|
||||
|
||||
availability: Dict[str,
|
||||
bool] = {}
|
||||
for name, provider_class in _PROVIDERS.items():
|
||||
availability: Dict[str, bool] = {}
|
||||
for info in REGISTRY.iter_providers():
|
||||
try:
|
||||
provider = provider_class(config)
|
||||
availability[name] = bool(
|
||||
provider.validate() and _supports_upload(provider)
|
||||
provider = info.provider_class(config)
|
||||
availability[info.canonical_name] = bool(
|
||||
provider.validate() and info.supports_upload
|
||||
)
|
||||
except Exception:
|
||||
availability[name] = False
|
||||
availability[info.canonical_name] = False
|
||||
return availability
|
||||
|
||||
|
||||
def match_provider_name_for_url(url: str) -> Optional[str]:
|
||||
"""Return a registered provider name that claims the URL's domain.
|
||||
|
||||
Providers can declare domains via class attribute `URL` (preferred) or `URL_DOMAINS`.
|
||||
This matcher is intentionally cheap (no provider instantiation, no network).
|
||||
"""
|
||||
|
||||
raw_url = str(url or "").strip()
|
||||
raw_url_lower = raw_url.lower()
|
||||
try:
|
||||
@@ -219,11 +350,6 @@ def match_provider_name_for_url(url: str) -> Optional[str]:
|
||||
host = ""
|
||||
path = ""
|
||||
|
||||
# Prefer Internet Archive for archive.org links unless the URL clearly refers
|
||||
# to a borrow/loan flow (handled by OpenLibrary provider).
|
||||
#
|
||||
# This keeps direct downloads and item pages routed to `internetarchive`, while
|
||||
# preserving OpenLibrary's scripted borrow pipeline for loan/reader URLs.
|
||||
def _norm_host(h: str) -> str:
|
||||
h_norm = str(h or "").strip().lower()
|
||||
if h_norm.startswith("www."):
|
||||
@@ -234,47 +360,45 @@ def match_provider_name_for_url(url: str) -> Optional[str]:
|
||||
|
||||
if host_norm:
|
||||
if host_norm == "openlibrary.org" or host_norm.endswith(".openlibrary.org"):
|
||||
return "openlibrary" if "openlibrary" in _PROVIDERS else None
|
||||
return "openlibrary" if REGISTRY.has_name("openlibrary") else None
|
||||
|
||||
if host_norm == "archive.org" or host_norm.endswith(".archive.org"):
|
||||
low_path = str(path or "").lower()
|
||||
is_borrowish = (
|
||||
low_path.startswith("/borrow/") or low_path.startswith("/stream/")
|
||||
or low_path.startswith("/services/loans/") or "/services/loans/" in low_path
|
||||
low_path.startswith("/borrow/")
|
||||
or low_path.startswith("/stream/")
|
||||
or low_path.startswith("/services/loans/")
|
||||
or "/services/loans/" in low_path
|
||||
)
|
||||
if is_borrowish:
|
||||
return "openlibrary" if "openlibrary" in _PROVIDERS else None
|
||||
return "internetarchive" if "internetarchive" in _PROVIDERS else None
|
||||
return "openlibrary" if REGISTRY.has_name("openlibrary") else None
|
||||
return "internetarchive" if REGISTRY.has_name("internetarchive") else None
|
||||
|
||||
for name, provider_class in _PROVIDERS.items():
|
||||
domains = _provider_url_patterns(provider_class)
|
||||
for info in REGISTRY.iter_providers():
|
||||
domains = _provider_url_patterns(info.provider_class)
|
||||
if not domains:
|
||||
continue
|
||||
for d in domains:
|
||||
dom_raw = str(d or "").strip()
|
||||
for domain in domains:
|
||||
dom_raw = str(domain or "").strip()
|
||||
dom = dom_raw.lower()
|
||||
if not dom:
|
||||
continue
|
||||
# Scheme-like patterns (magnet:, http://example) still use prefix match.
|
||||
if dom.startswith("magnet:") or dom.startswith("http://") or dom.startswith("https://"):
|
||||
if raw_url_lower.startswith(dom):
|
||||
return name
|
||||
return info.canonical_name
|
||||
continue
|
||||
|
||||
dom_norm = _norm_host(dom)
|
||||
if not dom_norm or not host_norm:
|
||||
continue
|
||||
if host_norm == dom_norm or host_norm.endswith("." + dom_norm):
|
||||
return name
|
||||
return info.canonical_name
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_provider_for_url(url: str,
|
||||
config: Optional[Dict[str,
|
||||
Any]] = None) -> Optional[Provider]:
|
||||
"""Instantiate and return the matching provider for a URL, if any."""
|
||||
|
||||
config: Optional[Dict[str, Any]] = None) -> Optional[Provider]:
|
||||
name = match_provider_name_for_url(url)
|
||||
if not name:
|
||||
return None
|
||||
@@ -282,10 +406,12 @@ def get_provider_for_url(url: str,
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SearchResult",
|
||||
"ProviderInfo",
|
||||
"Provider",
|
||||
"SearchProvider",
|
||||
"FileProvider",
|
||||
"SearchResult",
|
||||
"register_provider",
|
||||
"get_provider",
|
||||
"list_providers",
|
||||
"get_search_provider",
|
||||
@@ -294,7 +420,7 @@ __all__ = [
|
||||
"list_file_providers",
|
||||
"match_provider_name_for_url",
|
||||
"get_provider_for_url",
|
||||
"download_soulseek_file",
|
||||
"get_provider_class",
|
||||
"selection_auto_stage_for_table",
|
||||
"download_soulseek_file",
|
||||
]
|
||||
|
||||
1116
SYS/download.py
1116
SYS/download.py
File diff suppressed because it is too large
Load Diff
1819
SYS/metadata.py
1819
SYS/metadata.py
File diff suppressed because it is too large
Load Diff
234
SYS/tasks.py
234
SYS/tasks.py
@@ -1,234 +0,0 @@
|
||||
"""Background task handling and IPC helpers for mpv integration."""
|
||||
|
||||
from __future__ import annotations
|
||||
import errno
|
||||
import json
|
||||
import os
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from SYS.logger import log
|
||||
import threading
|
||||
import time
|
||||
from typing import IO, Iterable
|
||||
|
||||
|
||||
def connect_ipc(path: str, timeout: float = 5.0) -> IO[bytes] | None:
|
||||
"""Connect to the mpv IPC server located at *path*."""
|
||||
deadline = time.time() + timeout
|
||||
if not path:
|
||||
return None
|
||||
if os.name == "nt":
|
||||
# mpv exposes a named pipe on Windows. Keep retrying until it is ready.
|
||||
while True:
|
||||
try:
|
||||
return open(path, "r+b", buffering=0)
|
||||
except FileNotFoundError:
|
||||
if time.time() > deadline:
|
||||
return None
|
||||
time.sleep(0.05)
|
||||
except OSError as exc: # Pipe busy
|
||||
# Windows named pipes can intermittently raise EINVAL while the pipe exists
|
||||
# but is not ready/accepting connections yet.
|
||||
if exc.errno not in (errno.ENOENT,
|
||||
errno.EPIPE,
|
||||
errno.EBUSY,
|
||||
errno.EINVAL):
|
||||
raise
|
||||
if time.time() > deadline:
|
||||
return None
|
||||
time.sleep(0.05)
|
||||
else:
|
||||
sock = socket.socket(socket.AF_UNIX)
|
||||
while True:
|
||||
try:
|
||||
sock.connect(path)
|
||||
return sock.makefile("r+b", buffering=0)
|
||||
except FileNotFoundError:
|
||||
if time.time() > deadline:
|
||||
return None
|
||||
time.sleep(0.05)
|
||||
except OSError as exc:
|
||||
if exc.errno not in (errno.ENOENT, errno.ECONNREFUSED):
|
||||
raise
|
||||
if time.time() > deadline:
|
||||
return None
|
||||
time.sleep(0.05)
|
||||
|
||||
|
||||
def ipc_sender(ipc: IO[bytes] | None):
|
||||
"""Create a helper function for sending script messages via IPC."""
|
||||
if ipc is None:
|
||||
|
||||
def _noop(_event: str, _payload: dict) -> None:
|
||||
return None
|
||||
|
||||
return _noop
|
||||
lock = threading.Lock()
|
||||
|
||||
def _send(event: str, payload: dict) -> None:
|
||||
message = json.dumps(
|
||||
{
|
||||
"command": ["script-message",
|
||||
event,
|
||||
json.dumps(payload)]
|
||||
},
|
||||
ensure_ascii=False
|
||||
)
|
||||
encoded = message.encode("utf-8") + b"\n"
|
||||
with lock:
|
||||
try:
|
||||
ipc.write(encoded)
|
||||
ipc.flush()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
return _send
|
||||
|
||||
|
||||
def iter_stream(stream: Iterable[str]) -> Iterable[str]:
|
||||
for raw in stream:
|
||||
yield raw.rstrip("\r\n")
|
||||
|
||||
|
||||
def _run_task(args, parser) -> int:
|
||||
if not args.command:
|
||||
parser.error(
|
||||
'run-task requires a command to execute (use "--" before the command).'
|
||||
)
|
||||
env = os.environ.copy()
|
||||
for entry in args.env:
|
||||
key, sep, value = entry.partition("=")
|
||||
if not sep:
|
||||
parser.error(f"Invalid environment variable definition: {entry!r}")
|
||||
env[key] = value
|
||||
command = list(args.command)
|
||||
if command and command[0] == "--":
|
||||
command.pop(0)
|
||||
notifier = ipc_sender(connect_ipc(args.ipc, timeout=args.ipc_timeout))
|
||||
if not command:
|
||||
notifier(
|
||||
"downlow-task-event",
|
||||
{
|
||||
"id": args.task_id,
|
||||
"event": "error",
|
||||
"message": "No command provided after separator",
|
||||
},
|
||||
)
|
||||
log("[downlow.py] No command provided for run-task", file=sys.stderr)
|
||||
return 1
|
||||
if command and isinstance(command[0], str) and sys.executable:
|
||||
first = command[0].lower()
|
||||
if first in {"python",
|
||||
"python3",
|
||||
"py",
|
||||
"python.exe",
|
||||
"python3.exe",
|
||||
"py.exe"}:
|
||||
command[0] = sys.executable
|
||||
if os.environ.get("DOWNLOW_DEBUG"):
|
||||
log(f"Launching command: {command}", file=sys.stderr)
|
||||
notifier(
|
||||
"downlow-task-event",
|
||||
{
|
||||
"id": args.task_id,
|
||||
"event": "start",
|
||||
"command": command,
|
||||
"cwd": args.cwd or os.getcwd(),
|
||||
},
|
||||
)
|
||||
|
||||
popen_kwargs = {}
|
||||
if os.name == "nt":
|
||||
# Avoid flashing a console window when spawning console-subsystem executables.
|
||||
flags = 0
|
||||
try:
|
||||
flags |= int(getattr(subprocess, "CREATE_NO_WINDOW", 0x08000000))
|
||||
except Exception:
|
||||
flags |= 0x08000000
|
||||
popen_kwargs["creationflags"] = flags
|
||||
try:
|
||||
si = subprocess.STARTUPINFO()
|
||||
si.dwFlags |= subprocess.STARTF_USESHOWWINDOW
|
||||
si.wShowWindow = subprocess.SW_HIDE
|
||||
popen_kwargs["startupinfo"] = si
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
process = subprocess.Popen(
|
||||
command,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
cwd=args.cwd or None,
|
||||
env=env,
|
||||
text=True,
|
||||
bufsize=1,
|
||||
universal_newlines=True,
|
||||
**popen_kwargs,
|
||||
)
|
||||
except FileNotFoundError as exc:
|
||||
notifier(
|
||||
"downlow-task-event",
|
||||
{
|
||||
"id": args.task_id,
|
||||
"event": "error",
|
||||
"message": f"Executable not found: {exc.filename}",
|
||||
},
|
||||
)
|
||||
log(f"{exc}", file=sys.stderr)
|
||||
return 1
|
||||
stdout_lines: list[str] = []
|
||||
stderr_lines: list[str] = []
|
||||
|
||||
def pump(stream: IO[str], label: str, sink: list[str]) -> None:
|
||||
for line in iter_stream(stream):
|
||||
sink.append(line)
|
||||
notifier(
|
||||
"downlow-task-event",
|
||||
{
|
||||
"id": args.task_id,
|
||||
"event": label,
|
||||
"line": line,
|
||||
},
|
||||
)
|
||||
|
||||
threads = []
|
||||
if process.stdout:
|
||||
t_out = threading.Thread(
|
||||
target=pump,
|
||||
args=(process.stdout,
|
||||
"stdout",
|
||||
stdout_lines),
|
||||
daemon=True
|
||||
)
|
||||
t_out.start()
|
||||
threads.append(t_out)
|
||||
if process.stderr:
|
||||
t_err = threading.Thread(
|
||||
target=pump,
|
||||
args=(process.stderr,
|
||||
"stderr",
|
||||
stderr_lines),
|
||||
daemon=True
|
||||
)
|
||||
t_err.start()
|
||||
threads.append(t_err)
|
||||
return_code = process.wait()
|
||||
for t in threads:
|
||||
t.join(timeout=0.1)
|
||||
notifier(
|
||||
"downlow-task-event",
|
||||
{
|
||||
"id": args.task_id,
|
||||
"event": "exit",
|
||||
"returncode": return_code,
|
||||
"success": return_code == 0,
|
||||
},
|
||||
)
|
||||
# Also mirror aggregated output to stdout/stderr for compatibility when IPC is unavailable.
|
||||
if stdout_lines:
|
||||
log("\n".join(stdout_lines))
|
||||
if stderr_lines:
|
||||
log("\n".join(stderr_lines), file=sys.stderr)
|
||||
return return_code
|
||||
@@ -142,6 +142,8 @@ class Store:
|
||||
BaseStore] = {}
|
||||
self._backend_errors: Dict[str,
|
||||
str] = {}
|
||||
self._backend_types: Dict[str,
|
||||
str] = {}
|
||||
self._load_backends()
|
||||
|
||||
def _maybe_register_temp_alias(
|
||||
@@ -179,6 +181,7 @@ class Store:
|
||||
# Keep original name working, but add an alias.
|
||||
if backend_name != "temp":
|
||||
self._backends["temp"] = backend
|
||||
self._backend_types["temp"] = store_type
|
||||
except Exception:
|
||||
return
|
||||
|
||||
@@ -187,6 +190,7 @@ class Store:
|
||||
if not isinstance(store_cfg, dict):
|
||||
store_cfg = {}
|
||||
|
||||
self._backend_types = {}
|
||||
classes_by_type = _discover_store_classes()
|
||||
for raw_store_type, instances in store_cfg.items():
|
||||
if not isinstance(instances, dict):
|
||||
@@ -232,6 +236,7 @@ class Store:
|
||||
|
||||
backend_name = str(kwargs.get("NAME") or instance_name)
|
||||
self._backends[backend_name] = backend
|
||||
self._backend_types[backend_name] = store_type
|
||||
|
||||
# If this is the configured temp directory, also alias it as 'temp'.
|
||||
self._maybe_register_temp_alias(
|
||||
@@ -249,6 +254,47 @@ class Store:
|
||||
f"[Store] Failed to register {store_cls.__name__} instance '{instance_name}': {exc}"
|
||||
)
|
||||
|
||||
def _resolve_backend_name(self,
|
||||
backend_name: str) -> tuple[Optional[str], Optional[str]]:
|
||||
requested = str(backend_name or "")
|
||||
if requested in self._backends:
|
||||
return requested, None
|
||||
|
||||
requested_norm = _normalize_store_type(requested)
|
||||
|
||||
ci_matches = [
|
||||
name for name in self._backends
|
||||
if _normalize_store_type(name) == requested_norm
|
||||
]
|
||||
if len(ci_matches) == 1:
|
||||
return ci_matches[0], None
|
||||
if len(ci_matches) > 1:
|
||||
return None, f"Ambiguous store alias '{backend_name}' matches {ci_matches}"
|
||||
|
||||
type_matches = [
|
||||
name for name, store_type in self._backend_types.items()
|
||||
if store_type == requested_norm
|
||||
]
|
||||
if len(type_matches) == 1:
|
||||
return type_matches[0], None
|
||||
if len(type_matches) > 1:
|
||||
return None, (
|
||||
f"Ambiguous store alias '{backend_name}' matches type '{requested_norm}': {type_matches}"
|
||||
)
|
||||
|
||||
prefix_matches = [
|
||||
name for name, store_type in self._backend_types.items()
|
||||
if store_type.startswith(requested_norm)
|
||||
]
|
||||
if len(prefix_matches) == 1:
|
||||
return prefix_matches[0], None
|
||||
if len(prefix_matches) > 1:
|
||||
return None, (
|
||||
f"Ambiguous store alias '{backend_name}' matches type prefix '{requested_norm}': {prefix_matches}"
|
||||
)
|
||||
|
||||
return None, None
|
||||
|
||||
def get_backend_error(self, backend_name: str) -> Optional[str]:
|
||||
return self._backend_errors.get(str(backend_name))
|
||||
|
||||
@@ -277,14 +323,20 @@ class Store:
|
||||
return sorted(chosen.values())
|
||||
|
||||
def __getitem__(self, backend_name: str) -> BaseStore:
|
||||
if backend_name not in self._backends:
|
||||
resolved, err = self._resolve_backend_name(backend_name)
|
||||
if resolved:
|
||||
return self._backends[resolved]
|
||||
if err:
|
||||
raise KeyError(
|
||||
f"Unknown store backend: {backend_name}. Available: {list(self._backends.keys())}"
|
||||
f"Unknown store backend: {backend_name}. {err}"
|
||||
)
|
||||
return self._backends[backend_name]
|
||||
raise KeyError(
|
||||
f"Unknown store backend: {backend_name}. Available: {list(self._backends.keys())}"
|
||||
)
|
||||
|
||||
def is_available(self, backend_name: str) -> bool:
|
||||
return backend_name in self._backends
|
||||
resolved, _err = self._resolve_backend_name(backend_name)
|
||||
return resolved is not None
|
||||
|
||||
def try_add_url_for_pipe_object(self, pipe_obj: Any, url: str) -> bool:
|
||||
"""Best-effort helper: if `pipe_obj` contains `store` + `hash`, add `url` to that store backend.
|
||||
|
||||
@@ -244,7 +244,7 @@ class SharedArgs:
|
||||
description="Destination location",
|
||||
)
|
||||
|
||||
DELETE_FLAG = CmdletArg(
|
||||
DELETE = CmdletArg(
|
||||
"delete",
|
||||
type="flag",
|
||||
description="Delete the file and its .tag after successful operation.",
|
||||
@@ -2081,6 +2081,12 @@ def extract_url_from_result(result: Any) -> list[str]:
|
||||
_extend(result.metadata.get("url"))
|
||||
_extend(result.metadata.get("url"))
|
||||
_extend(result.metadata.get("url"))
|
||||
if isinstance(getattr(result, "full_metadata", None), dict):
|
||||
fm = getattr(result, "full_metadata", None)
|
||||
if isinstance(fm, dict):
|
||||
_extend(fm.get("url"))
|
||||
_extend(fm.get("url"))
|
||||
_extend(fm.get("url"))
|
||||
elif hasattr(result, "url") or hasattr(result, "url"):
|
||||
# Handle objects with url/url attribute
|
||||
_extend(getattr(result, "url", None))
|
||||
@@ -2090,6 +2096,11 @@ def extract_url_from_result(result: Any) -> list[str]:
|
||||
_extend(result.get("url"))
|
||||
_extend(result.get("url"))
|
||||
_extend(result.get("url"))
|
||||
fm = result.get("full_metadata")
|
||||
if isinstance(fm, dict):
|
||||
_extend(fm.get("url"))
|
||||
_extend(fm.get("url"))
|
||||
_extend(fm.get("url"))
|
||||
extra = result.get("extra")
|
||||
if isinstance(extra, dict):
|
||||
_extend(extra.get("url"))
|
||||
@@ -2531,6 +2542,30 @@ def resolve_tidal_manifest_path(item: Any) -> Optional[str]:
|
||||
metadata["_tidal_track_details_fetched"] = True
|
||||
except Exception:
|
||||
pass
|
||||
if not metadata.get("url"):
|
||||
try:
|
||||
resp_info = httpx.get(
|
||||
"https://tidal-api.binimum.org/info/",
|
||||
params={"id": str(track_int)},
|
||||
timeout=10.0,
|
||||
)
|
||||
resp_info.raise_for_status()
|
||||
info_payload = resp_info.json()
|
||||
info_data = info_payload.get("data") if isinstance(info_payload, dict) else None
|
||||
if isinstance(info_data, dict) and info_data:
|
||||
try:
|
||||
for k, v in info_data.items():
|
||||
if k not in metadata:
|
||||
metadata[k] = v
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if info_data.get("url"):
|
||||
metadata["url"] = info_data.get("url")
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@@ -345,6 +345,14 @@ class Add_File(Cmdlet):
|
||||
else:
|
||||
items_to_process = [result]
|
||||
|
||||
total_items = len(items_to_process) if isinstance(items_to_process, list) else 0
|
||||
processed_items = 0
|
||||
try:
|
||||
if total_items:
|
||||
progress.set_percent(0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Minimal step-based progress for single-item runs.
|
||||
# Many add-file flows don't emit intermediate items, so without steps the pipe can look "stuck".
|
||||
use_steps = False
|
||||
@@ -496,9 +504,25 @@ class Add_File(Cmdlet):
|
||||
and len(items_to_process) > 1
|
||||
)
|
||||
|
||||
for item in items_to_process:
|
||||
for idx, item in enumerate(items_to_process, 1):
|
||||
pipe_obj = coerce_to_pipe_object(item, path_arg)
|
||||
|
||||
try:
|
||||
label = pipe_obj.title or pipe_obj.name
|
||||
if not label and pipe_obj.path:
|
||||
try:
|
||||
label = Path(str(pipe_obj.path)).name
|
||||
except Exception:
|
||||
label = pipe_obj.path
|
||||
if not label:
|
||||
label = "file"
|
||||
if total_items:
|
||||
pending_pct = int(round(((idx - 1) / max(1, total_items)) * 100))
|
||||
progress.set_percent(pending_pct)
|
||||
progress.set_status(f"adding {idx}/{total_items}: {label}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
temp_dir_to_cleanup: Optional[Path] = None
|
||||
delete_after_item = delete_after
|
||||
try:
|
||||
@@ -597,6 +621,14 @@ class Add_File(Cmdlet):
|
||||
shutil.rmtree(temp_dir_to_cleanup, ignore_errors=True)
|
||||
except Exception:
|
||||
pass
|
||||
processed_items += 1
|
||||
try:
|
||||
pct = int(round((processed_items / max(1, total_items)) * 100))
|
||||
progress.set_percent(pct)
|
||||
if processed_items >= total_items:
|
||||
progress.clear_status()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Apply deferred url associations (bulk) before showing the final store table.
|
||||
if pending_url_associations:
|
||||
|
||||
289
cmdlet/convert_file.py
Normal file
289
cmdlet/convert_file.py
Normal file
@@ -0,0 +1,289 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence, Optional
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
from SYS.logger import log, debug
|
||||
from SYS.utils import sha256_file
|
||||
from . import _shared as sh
|
||||
from SYS import pipeline as ctx
|
||||
|
||||
Cmdlet = sh.Cmdlet
|
||||
CmdletArg = sh.CmdletArg
|
||||
QueryArg = sh.QueryArg
|
||||
SharedArgs = sh.SharedArgs
|
||||
parse_cmdlet_args = sh.parse_cmdlet_args
|
||||
normalize_result_input = sh.normalize_result_input
|
||||
extract_title_from_result = sh.extract_title_from_result
|
||||
|
||||
|
||||
VIDEO_EXTS = {
|
||||
"mp4",
|
||||
"mkv",
|
||||
"webm",
|
||||
"mov",
|
||||
"avi",
|
||||
"flv",
|
||||
"mpeg",
|
||||
"mpg",
|
||||
"m4v",
|
||||
}
|
||||
|
||||
AUDIO_EXTS = {
|
||||
"mp3",
|
||||
"m4a",
|
||||
"m4b",
|
||||
"aac",
|
||||
"flac",
|
||||
"wav",
|
||||
"ogg",
|
||||
"opus",
|
||||
"mka",
|
||||
}
|
||||
|
||||
IMAGE_EXTS = {
|
||||
"png",
|
||||
"jpg",
|
||||
"jpeg",
|
||||
"webp",
|
||||
"bmp",
|
||||
"tif",
|
||||
"tiff",
|
||||
"gif",
|
||||
}
|
||||
|
||||
DOC_EXTS = {
|
||||
"pdf",
|
||||
"mobi",
|
||||
"epub",
|
||||
"azw3",
|
||||
"txt",
|
||||
"rtf",
|
||||
"html",
|
||||
"htm",
|
||||
"md",
|
||||
"doc",
|
||||
"docx",
|
||||
}
|
||||
|
||||
|
||||
def _detect_kind(ext: str) -> str:
|
||||
e = ext.lower().lstrip(".")
|
||||
if e in VIDEO_EXTS:
|
||||
return "video"
|
||||
if e in AUDIO_EXTS:
|
||||
return "audio"
|
||||
if e in IMAGE_EXTS:
|
||||
return "image"
|
||||
if e in DOC_EXTS:
|
||||
return "doc"
|
||||
return "unknown"
|
||||
|
||||
|
||||
def _allowed(source_kind: str, target_kind: str) -> bool:
|
||||
if source_kind == target_kind:
|
||||
return True
|
||||
if source_kind == "video" and target_kind == "audio":
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _ffmpeg_convert(
|
||||
input_path: Path,
|
||||
output_path: Path,
|
||||
target_kind: str,
|
||||
copy_metadata: bool,
|
||||
) -> bool:
|
||||
ffmpeg_path = shutil.which("ffmpeg")
|
||||
if not ffmpeg_path:
|
||||
log("ffmpeg not found in PATH", file=sys.stderr)
|
||||
return False
|
||||
|
||||
cmd = [ffmpeg_path, "-y", "-i", str(input_path)]
|
||||
|
||||
if target_kind == "audio":
|
||||
cmd.extend(["-vn"])
|
||||
|
||||
if copy_metadata:
|
||||
cmd.extend(["-map_metadata", "0"])
|
||||
|
||||
cmd.append(str(output_path))
|
||||
|
||||
debug(f"[convert-file] Running ffmpeg: {' '.join(cmd)}")
|
||||
proc = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if proc.returncode != 0:
|
||||
log(f"ffmpeg error: {proc.stderr}", file=sys.stderr)
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _doc_convert(input_path: Path, output_path: Path) -> bool:
|
||||
try:
|
||||
import pypandoc # type: ignore
|
||||
except Exception:
|
||||
log("pypandoc is required for document conversion; install pypandoc-binary", file=sys.stderr)
|
||||
return False
|
||||
|
||||
target_fmt = output_path.suffix.lstrip(".").lower() or "pdf"
|
||||
|
||||
try:
|
||||
pypandoc.convert_file(
|
||||
str(input_path),
|
||||
to=target_fmt,
|
||||
outputfile=str(output_path),
|
||||
)
|
||||
except OSError as exc:
|
||||
log(f"pandoc is missing or failed to run: {exc}", file=sys.stderr)
|
||||
return False
|
||||
except Exception as exc:
|
||||
log(f"pypandoc conversion failed: {exc}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
if not output_path.exists():
|
||||
log("pypandoc conversion did not produce an output file", file=sys.stderr)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="convert-file",
|
||||
summary="Convert files between media/container formats (video, audio, image, documents).",
|
||||
usage="convert-file -to <format> [-path <file|dir>] [-delete] [-query format:<fmt>]",
|
||||
arg=[
|
||||
QueryArg("to", key="format", query_only=False, required=True,
|
||||
description="Target format/extension (e.g., mp4, mp3, wav, jpg, pdf)."),
|
||||
SharedArgs.PATH,
|
||||
SharedArgs.QUERY,
|
||||
SharedArgs.DELETE,
|
||||
],
|
||||
detail=[
|
||||
"Allows video↔video, audio↔audio, image↔image, doc↔doc, and video→audio conversions.",
|
||||
"Disallows incompatible conversions (e.g., video→pdf).",
|
||||
"Uses ffmpeg for media and pypandoc-binary (bundled pandoc) for document formats (mobi/epub→pdf/txt/etc).",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def _resolve_output_path(input_path: Path, outdir: Optional[Path], target_ext: str) -> Path:
|
||||
base = input_path.stem
|
||||
directory = outdir if outdir is not None else input_path.parent
|
||||
directory.mkdir(parents=True, exist_ok=True)
|
||||
candidate = directory / f"{base}.{target_ext}"
|
||||
if candidate.exists():
|
||||
for i in range(1, 1000):
|
||||
alt = directory / f"{base}_{i}.{target_ext}"
|
||||
if not alt.exists():
|
||||
candidate = alt
|
||||
break
|
||||
return candidate
|
||||
|
||||
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
parsed = parse_cmdlet_args(args, CMDLET)
|
||||
|
||||
target_fmt_raw = parsed.get("to") or parsed.get("format")
|
||||
if not target_fmt_raw:
|
||||
log("-to <format> is required", file=sys.stderr)
|
||||
return 1
|
||||
target_fmt = str(target_fmt_raw).lower().lstrip(".")
|
||||
target_kind = _detect_kind(target_fmt)
|
||||
if target_kind == "unknown":
|
||||
log(f"Unsupported target format: {target_fmt}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
delete_src = bool(parsed.get("delete", False))
|
||||
|
||||
inputs = normalize_result_input(result)
|
||||
path_arg = parsed.get("path")
|
||||
|
||||
outdir_override: Optional[Path] = None
|
||||
if path_arg:
|
||||
try:
|
||||
p = Path(str(path_arg)).expanduser()
|
||||
if p.exists() and p.is_dir():
|
||||
outdir_override = p
|
||||
else:
|
||||
inputs.append({"path": p})
|
||||
except Exception:
|
||||
inputs.append({"path": path_arg})
|
||||
|
||||
if not inputs:
|
||||
log("No input provided to convert-file", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
success = 0
|
||||
|
||||
for item in inputs:
|
||||
input_path: Optional[Path] = None
|
||||
if isinstance(item, dict):
|
||||
p = item.get("path") or item.get("target")
|
||||
elif hasattr(item, "path"):
|
||||
p = getattr(item, "path")
|
||||
else:
|
||||
p = item
|
||||
|
||||
try:
|
||||
input_path = Path(str(p)) if p else None
|
||||
except Exception:
|
||||
input_path = None
|
||||
|
||||
if not input_path or not input_path.exists() or not input_path.is_file():
|
||||
log("convert-file: input path missing or not found", file=sys.stderr)
|
||||
continue
|
||||
|
||||
source_ext = input_path.suffix.lower().lstrip(".")
|
||||
source_kind = _detect_kind(source_ext)
|
||||
|
||||
if not _allowed(source_kind, target_kind):
|
||||
log(
|
||||
f"Conversion from {source_kind or 'unknown'} to {target_kind} is not allowed",
|
||||
file=sys.stderr,
|
||||
)
|
||||
continue
|
||||
|
||||
output_path = _resolve_output_path(input_path, outdir_override, target_fmt)
|
||||
|
||||
converted = False
|
||||
if target_kind in {"video", "audio", "image"}:
|
||||
converted = _ffmpeg_convert(input_path, output_path, target_kind, copy_metadata=True)
|
||||
elif target_kind == "doc":
|
||||
converted = _doc_convert(input_path, output_path)
|
||||
else:
|
||||
log(f"No converter for target kind {target_kind}", file=sys.stderr)
|
||||
|
||||
if not converted:
|
||||
continue
|
||||
|
||||
try:
|
||||
out_hash = sha256_file(output_path)
|
||||
except Exception:
|
||||
out_hash = None
|
||||
|
||||
title = extract_title_from_result(item) or output_path.stem
|
||||
|
||||
ctx.emit({
|
||||
"path": str(output_path),
|
||||
"title": title,
|
||||
"hash": out_hash,
|
||||
"media_kind": target_kind,
|
||||
"source_path": str(input_path),
|
||||
})
|
||||
|
||||
if delete_src:
|
||||
try:
|
||||
input_path.unlink()
|
||||
log(f"Deleted source file: {input_path}", file=sys.stderr)
|
||||
except Exception as exc:
|
||||
log(f"Failed to delete source {input_path}: {exc}", file=sys.stderr)
|
||||
|
||||
success += 1
|
||||
|
||||
return 0 if success else 1
|
||||
|
||||
|
||||
CMDLET.exec = _run
|
||||
CMDLET.register()
|
||||
@@ -22,8 +22,8 @@ from Provider import internetarchive as ia_provider
|
||||
from Provider import alldebrid as ad_provider
|
||||
from Provider import openlibrary as ol_provider
|
||||
|
||||
from SYS.download import DownloadError, _download_direct_file
|
||||
from SYS.models import DownloadOptions, DownloadMediaResult
|
||||
from API.HTTP import _download_direct_file
|
||||
from SYS.models import DownloadError, DownloadOptions, DownloadMediaResult
|
||||
from SYS.logger import log, debug
|
||||
from SYS.pipeline_progress import PipelineProgress
|
||||
from SYS.result_table import ResultTable
|
||||
@@ -890,7 +890,6 @@ class Download_File(Cmdlet):
|
||||
return expanded_items
|
||||
|
||||
def _process_provider_items(
|
||||
self,
|
||||
*,
|
||||
piped_items: Sequence[Any],
|
||||
final_output_dir: Path,
|
||||
@@ -900,8 +899,9 @@ class Download_File(Cmdlet):
|
||||
registry: Dict[str,
|
||||
Any],
|
||||
progress: PipelineProgress,
|
||||
) -> int:
|
||||
) -> tuple[int, int]:
|
||||
downloaded_count = 0
|
||||
queued_magnet_submissions = 0
|
||||
get_search_provider = registry.get("get_search_provider")
|
||||
SearchResult = registry.get("SearchResult")
|
||||
|
||||
@@ -911,8 +911,17 @@ class Download_File(Cmdlet):
|
||||
config=config
|
||||
)
|
||||
|
||||
total_items = len(expanded_items)
|
||||
processed_items = 0
|
||||
try:
|
||||
if total_items:
|
||||
progress.set_percent(0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
for item in expanded_items:
|
||||
try:
|
||||
label = "item"
|
||||
table = get_field(item, "table")
|
||||
title = get_field(item, "title")
|
||||
target = get_field(item, "path") or get_field(item, "url")
|
||||
@@ -933,6 +942,25 @@ class Download_File(Cmdlet):
|
||||
if isinstance(extra_md, dict):
|
||||
full_metadata = extra_md
|
||||
|
||||
try:
|
||||
label = title or target
|
||||
label = str(label or "item").strip()
|
||||
if total_items:
|
||||
pct = int(round((processed_items / max(1, total_items)) * 100))
|
||||
progress.set_percent(pct)
|
||||
progress.set_status(
|
||||
f"downloading {processed_items + 1}/{total_items}: {label}"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
transfer_label = label
|
||||
if str(table or "").lower() == "hifi":
|
||||
try:
|
||||
progress.begin_transfer(label=transfer_label, total=None)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# If this looks like a provider item and providers are available, prefer provider.download()
|
||||
downloaded_path: Optional[Path] = None
|
||||
attempted_provider_download = False
|
||||
@@ -1065,6 +1093,45 @@ class Download_File(Cmdlet):
|
||||
|
||||
continue
|
||||
|
||||
# Magnet targets (e.g., torrent provider results) -> submit/download via AllDebrid
|
||||
if downloaded_path is None and isinstance(target, str) and is_magnet_link(str(target)):
|
||||
magnet_spec = ad_provider.resolve_magnet_spec(str(target))
|
||||
if magnet_spec:
|
||||
|
||||
def _on_emit(path: Path, file_url: str, relpath: str, metadata: Dict[str, Any]) -> None:
|
||||
title_hint = metadata.get("name") or relpath or title
|
||||
self._emit_local_file(
|
||||
downloaded_path=path,
|
||||
source=file_url or target,
|
||||
title_hint=title_hint,
|
||||
tags_hint=None,
|
||||
media_kind_hint="file",
|
||||
full_metadata=metadata,
|
||||
progress=progress,
|
||||
config=config,
|
||||
provider_hint="alldebrid",
|
||||
)
|
||||
|
||||
downloaded, magnet_id = ad_provider.download_magnet(
|
||||
magnet_spec,
|
||||
str(target),
|
||||
final_output_dir,
|
||||
config,
|
||||
progress,
|
||||
quiet_mode,
|
||||
self._path_from_download_result,
|
||||
_on_emit,
|
||||
)
|
||||
|
||||
if downloaded > 0:
|
||||
downloaded_count += downloaded
|
||||
continue
|
||||
|
||||
# If queued but not yet ready, skip the generic unsupported-target error.
|
||||
if magnet_id is not None:
|
||||
queued_magnet_submissions += 1
|
||||
continue
|
||||
|
||||
# Fallback: if we have a direct HTTP URL, download it directly
|
||||
if (downloaded_path is None and isinstance(target,
|
||||
str)
|
||||
@@ -1080,6 +1147,7 @@ class Download_File(Cmdlet):
|
||||
file=sys.stderr,
|
||||
)
|
||||
continue
|
||||
|
||||
debug(
|
||||
f"[download-file] Provider item looks like direct URL, downloading: {target}"
|
||||
)
|
||||
@@ -1150,8 +1218,22 @@ class Download_File(Cmdlet):
|
||||
log(f"Download failed: {e}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
log(f"Error downloading item: {e}", file=sys.stderr)
|
||||
finally:
|
||||
if str(table or "").lower() == "hifi":
|
||||
try:
|
||||
progress.finish_transfer(label=transfer_label)
|
||||
except Exception:
|
||||
pass
|
||||
processed_items += 1
|
||||
try:
|
||||
pct = int(round((processed_items / max(1, total_items)) * 100))
|
||||
progress.set_percent(pct)
|
||||
if processed_items >= total_items:
|
||||
progress.clear_status()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return downloaded_count
|
||||
return downloaded_count, queued_magnet_submissions
|
||||
|
||||
# === Streaming helpers (yt-dlp) ===
|
||||
|
||||
@@ -2687,6 +2769,15 @@ class Download_File(Cmdlet):
|
||||
|
||||
debug(f"Output directory: {final_output_dir}")
|
||||
|
||||
try:
|
||||
PipelineProgress(pipeline_context).ensure_local_ui(
|
||||
label="download-file",
|
||||
total_items=len(supported_url),
|
||||
items_preview=supported_url,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
clip_spec = parsed.get("clip")
|
||||
query_spec = parsed.get("query")
|
||||
|
||||
@@ -3572,7 +3663,7 @@ class Download_File(Cmdlet):
|
||||
if early_exit is not None:
|
||||
return int(early_exit)
|
||||
|
||||
downloaded_count += self._process_provider_items(
|
||||
provider_downloaded, magnet_submissions = self._process_provider_items(
|
||||
piped_items=piped_items,
|
||||
final_output_dir=final_output_dir,
|
||||
config=config,
|
||||
@@ -3580,9 +3671,13 @@ class Download_File(Cmdlet):
|
||||
registry=registry,
|
||||
progress=progress,
|
||||
)
|
||||
downloaded_count += provider_downloaded
|
||||
|
||||
if downloaded_count > 0 or streaming_downloaded > 0:
|
||||
debug(f"✓ Successfully processed {downloaded_count} file(s)")
|
||||
if downloaded_count > 0 or streaming_downloaded > 0 or magnet_submissions > 0:
|
||||
msg = f"✓ Successfully processed {downloaded_count} file(s)"
|
||||
if magnet_submissions:
|
||||
msg += f" and queued {magnet_submissions} magnet(s)"
|
||||
debug(msg)
|
||||
return 0
|
||||
|
||||
if streaming_exit_code is not None:
|
||||
|
||||
@@ -255,7 +255,7 @@ def _pick_supported_ytdlp_url(urls: List[str]) -> Optional[str]:
|
||||
|
||||
# Prefer a true support check when the Python module is available.
|
||||
try:
|
||||
from SYS.download import is_url_supported_by_ytdlp
|
||||
from tool.ytdlp import is_url_supported_by_ytdlp
|
||||
|
||||
for text in candidates:
|
||||
try:
|
||||
|
||||
@@ -246,7 +246,7 @@ class search_file(Cmdlet):
|
||||
else:
|
||||
table_title = f"{provider_label}: {query}".strip().rstrip(":")
|
||||
|
||||
preserve_order = provider_lower in {"youtube", "openlibrary", "loc"}
|
||||
preserve_order = provider_lower in {"youtube", "openlibrary", "loc", "torrent"}
|
||||
table_type = provider_name
|
||||
table_meta: Dict[str, Any] = {"provider": provider_name}
|
||||
if provider_lower == "hifi":
|
||||
|
||||
@@ -444,7 +444,7 @@ def _resolve_upload_path(item: Any, config: Dict[str, Any]) -> Optional[str]:
|
||||
url = _maybe_unlock_alldebrid_url(url, config)
|
||||
|
||||
try:
|
||||
from SYS.download import _download_direct_file
|
||||
from API.HTTP import _download_direct_file
|
||||
|
||||
base_tmp = None
|
||||
if isinstance(config, dict):
|
||||
|
||||
165
docs/provider_guide.md
Normal file
165
docs/provider_guide.md
Normal file
@@ -0,0 +1,165 @@
|
||||
# Provider Development Guide
|
||||
|
||||
## 🎯 Purpose
|
||||
This guide describes how to write, test, and register a provider so the application can discover and use it as a pluggable component.
|
||||
|
||||
> Keep provider code small, focused, and well-tested. Use existing providers as examples.
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Anatomy of a Provider
|
||||
A provider is a Python class that extends `ProviderCore.base.Provider` and implements a few key methods and attributes.
|
||||
|
||||
Minimum expectations:
|
||||
- `class MyProvider(Provider):` — subclass the base provider
|
||||
- `URL` / `URL_DOMAINS` or `url_patterns()` — to let the registry route URLs
|
||||
- `validate(self) -> bool` — return True when provider is configured and usable
|
||||
- `search(self, query, limit=50, filters=None, **kwargs)` — return a list of `SearchResult`
|
||||
|
||||
Optional but common:
|
||||
- `download(self, result: SearchResult, output_dir: Path) -> Optional[Path]` — download a provider result
|
||||
- `selector(self, selected_items, *, ctx, stage_is_last=True, **kwargs) -> bool` — handle `@N` selections
|
||||
- `download_url(self, url, output_dir, progress_cb=None)` — direct URL-handling helper
|
||||
|
||||
---
|
||||
|
||||
## 🧩 SearchResult
|
||||
Use `ProviderCore.base.SearchResult` to describe results returned by `search()`.
|
||||
Important fields:
|
||||
- `table` (str) — provider table name
|
||||
- `title` (str) — short human title
|
||||
- `path` (str) — canonical URL / link the provider/dl may use
|
||||
- `media_kind` (str) — `file`, `folder`, `book`, etc.
|
||||
- `columns` (list[tuple[str,str]]) — extra key/value pairs to display
|
||||
- `full_metadata` (dict) — provider-specific metadata for downstream stages
|
||||
- `annotations` / `tag` — simple metadata for filtering
|
||||
|
||||
Return a list of `SearchResult(...)` objects or simple dicts convertible with `.to_dict()`.
|
||||
|
||||
---
|
||||
|
||||
## ✅ Implementing search()
|
||||
- Parse and sanitize `query` and `filters`.
|
||||
- Return no more than `limit` results.
|
||||
- Use `columns` to provide table columns (TITLE, Seeds, Size, etc.).
|
||||
- Keep `search()` fast and predictable (apply reasonable timeouts).
|
||||
|
||||
Example:
|
||||
|
||||
```python
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
|
||||
class HelloProvider(Provider):
|
||||
def search(self, query, limit=50, filters=None, **kwargs):
|
||||
q = (query or "").strip()
|
||||
if not q:
|
||||
return []
|
||||
results = []
|
||||
# Build up results
|
||||
results.append(
|
||||
SearchResult(
|
||||
table="hello",
|
||||
title=f"Hit for {q}",
|
||||
path=f"https://example/{q}",
|
||||
columns=[("Info", "example")],
|
||||
full_metadata={"source": "hello"},
|
||||
)
|
||||
)
|
||||
return results[:max(0, int(limit))]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## ⬇️ Implementing download() and download_url()
|
||||
- Prefer provider `download(self, result, output_dir)` for piped provider items.
|
||||
- For provider-provided URLs, implement `download_url` to allow `download-file` to route downloads through providers.
|
||||
- Use the repo `_download_direct_file` helper for HTTP downloads when possible.
|
||||
|
||||
Example download():
|
||||
|
||||
```python
|
||||
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
|
||||
# Validate config
|
||||
url = getattr(result, "path", None)
|
||||
if not url or not url.startswith("http"):
|
||||
return None
|
||||
# use existing helpers to fetch the file
|
||||
return _download_direct_file(url, output_dir)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🧭 URL routing
|
||||
Providers can declare:
|
||||
- `URL = ("magnet:",)` or similar prefix list
|
||||
- `URL_DOMAINS = ("example.com",)` to match hosts
|
||||
- Or override `@classmethod def url_patterns(cls):` to combine static and dynamic patterns
|
||||
|
||||
The registry uses these to match `download-file <url>` or to pick which provider should handle the URL.
|
||||
|
||||
---
|
||||
|
||||
## 🛠 Selector (handling `@N` picks)
|
||||
- Implement `selector(self, selected_items, *, ctx, stage_is_last=True)` to present a sub-table or to enqueue downloads.
|
||||
- Use `ctx.set_last_result_table()` and `ctx.set_current_stage_table()` to display follow-ups.
|
||||
- Return `True` when you handled the selection and the pipeline should pause or proceed accordingly.
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Testing providers
|
||||
- Keep tests small and local. Create `tests/test_provider_<name>.py`.
|
||||
- Test `search()` with mock HTTP responses (use `requests-mock` or similar).
|
||||
- Test `download()` using a temp directory and a small file server or by mocking `_download_direct_file`.
|
||||
- Test `selector()` by constructing a fake result and `ctx` object.
|
||||
|
||||
Example PowerShell commands to run tests (repo root):
|
||||
|
||||
```powershell
|
||||
# Run a single test file
|
||||
pytest tests/test_provider_hello.py -q
|
||||
|
||||
# Run all tests
|
||||
pytest -q
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📦 Registration & packaging
|
||||
- Add your provider module under `Provider/` and ensure it is imported by module package initialization. Common approach:
|
||||
- Place file `Provider/myprovider.py`
|
||||
- Ensure `Provider/__init__.py` imports the module (or the registry auto-discovers by package import)
|
||||
- If the project has a central provider registry, add lookup helpers there (e.g., `ProviderCore/registry.py`). Usually providers register themselves at import time.
|
||||
|
||||
---
|
||||
|
||||
## 💡 Best practices & tips
|
||||
- Use `debug()` / `log()` appropriately; avoid noisy stderr output in normal runs.
|
||||
- Prefer returning `SearchResult` objects to provide consistent UX.
|
||||
- Keep `search()` tolerant (timeouts, malformed responses) and avoid raising for expected network problems.
|
||||
- Use `full_metadata` to pass non-display data to `download()` and `selector()`.
|
||||
- Respect the `limit` parameter in `search()`.
|
||||
|
||||
---
|
||||
|
||||
## 🧾 Example provider checklist
|
||||
- [ ] Implement `search()` and return `SearchResult` items
|
||||
- [ ] Implement `validate()` to check essential config (API keys, credentials)
|
||||
- [ ] Provide `URL` / `URL_DOMAINS` or `url_patterns()` for routing
|
||||
- [ ] Add `download()` or `download_url()` for piped/passed URL downloads
|
||||
- [ ] Add tests under `tests/`
|
||||
- [ ] Add module to `Provider/` package and ensure import/registration
|
||||
|
||||
---
|
||||
|
||||
## 🔗 Further reading
|
||||
- See existing providers in `Provider/` for patterns and edge cases.
|
||||
- Check `API/` helpers for HTTP and debrid clients.
|
||||
|
||||
|
||||
---
|
||||
|
||||
If you'd like, I can:
|
||||
- Add an example provider file under `Provider/` as a template (see `Provider/hello_provider.py`), and
|
||||
- Create unit tests for it (see `tests/test_provider_hello.py`).
|
||||
|
||||
I have added a minimal example provider and tests in this repository; use them as a starting point for new providers.
|
||||
@@ -19,6 +19,7 @@ pypdf>=3.0.0
|
||||
mutagen>=1.46.0
|
||||
cbor2>=4.0
|
||||
zstandard>=0.23.0
|
||||
pypandoc-binary
|
||||
|
||||
# Image and media support
|
||||
Pillow>=10.0.0
|
||||
@@ -45,3 +46,4 @@ playwright>=1.40.0
|
||||
|
||||
# Development and utilities
|
||||
python-dateutil>=2.8.0
|
||||
|
||||
|
||||
10
tmp_trim_registry.py
Normal file
10
tmp_trim_registry.py
Normal file
@@ -0,0 +1,10 @@
|
||||
from pathlib import Path
|
||||
|
||||
path = Path("ProviderCore/registry.py")
|
||||
text = path.read_text()
|
||||
marker = '"""Provider registry.'
|
||||
first = text.find(marker)
|
||||
second = text.find(marker, first + 1)
|
||||
if second != -1:
|
||||
trimmed = text[:second].rstrip() + "\n"
|
||||
path.write_text(trimmed, encoding="utf-8")
|
||||
3
tmp_write_registry.py
Normal file
3
tmp_write_registry.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from pathlib import Path
|
||||
|
||||
new_content = """"""
|
||||
@@ -29,6 +29,9 @@ from SYS.models import (
|
||||
from SYS.pipeline_progress import PipelineProgress
|
||||
from SYS.utils import ensure_directory, sha256_file
|
||||
|
||||
_YTDLP_TRANSFER_STATE: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
|
||||
try:
|
||||
import yt_dlp # type: ignore
|
||||
from yt_dlp.extractor import gen_extractors # type: ignore
|
||||
@@ -565,9 +568,35 @@ class YtDlpTool:
|
||||
|
||||
# Progress + utility helpers for yt-dlp driven downloads (previously in cmdlet/download_media).
|
||||
_YTDLP_PROGRESS_BAR = ProgressBar()
|
||||
_YTDLP_TRANSFER_STATE: Dict[str, Dict[str, Any]] = {}
|
||||
_SUBTITLE_EXTS = (".vtt", ".srt", ".ass", ".ssa", ".lrc")
|
||||
|
||||
|
||||
def _progress_label(status: Dict[str, Any]) -> str:
|
||||
info_dict = status.get("info_dict") if isinstance(status.get("info_dict"), dict) else {}
|
||||
|
||||
candidates = [
|
||||
status.get("filename"),
|
||||
info_dict.get("_filename"),
|
||||
info_dict.get("filepath"),
|
||||
info_dict.get("title"),
|
||||
info_dict.get("id"),
|
||||
]
|
||||
|
||||
for cand in candidates:
|
||||
if not cand:
|
||||
continue
|
||||
try:
|
||||
name = Path(str(cand)).name
|
||||
except Exception:
|
||||
name = str(cand)
|
||||
label = str(name or "").strip()
|
||||
if label:
|
||||
return label
|
||||
|
||||
return "download"
|
||||
|
||||
|
||||
def _live_ui_and_pipe_index() -> tuple[Optional[Any], int]:
|
||||
ui = None
|
||||
try:
|
||||
@@ -937,19 +966,53 @@ def _extract_sha256(info: Dict[str, Any]) -> Optional[str]:
|
||||
|
||||
|
||||
def _progress_callback(status: Dict[str, Any]) -> None:
|
||||
label = _progress_label(status)
|
||||
event = status.get("status")
|
||||
if event == "downloading":
|
||||
downloaded = status.get("downloaded_bytes")
|
||||
total = status.get("total_bytes") or status.get("total_bytes_estimate")
|
||||
downloaded = status.get("downloaded_bytes")
|
||||
total = status.get("total_bytes") or status.get("total_bytes_estimate")
|
||||
|
||||
_YTDLP_PROGRESS_BAR.update(
|
||||
downloaded=int(downloaded) if downloaded is not None else None,
|
||||
total=int(total) if total is not None else None,
|
||||
label="download",
|
||||
file=sys.stderr,
|
||||
)
|
||||
pipeline = PipelineProgress(pipeline_context)
|
||||
live_ui, _ = pipeline.ui_and_pipe_index()
|
||||
use_live = live_ui is not None
|
||||
|
||||
def _total_bytes(value: Any) -> Optional[int]:
|
||||
try:
|
||||
if isinstance(value, (int, float)) and value > 0:
|
||||
return int(value)
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
if event == "downloading":
|
||||
if use_live:
|
||||
try:
|
||||
if not _YTDLP_TRANSFER_STATE.get(label, {}).get("started"):
|
||||
pipeline.begin_transfer(label=label, total=_total_bytes(total))
|
||||
_YTDLP_TRANSFER_STATE[label] = {"started": True}
|
||||
pipeline.update_transfer(
|
||||
label=label,
|
||||
completed=int(downloaded) if downloaded is not None else None,
|
||||
total=_total_bytes(total),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
_YTDLP_PROGRESS_BAR.update(
|
||||
downloaded=int(downloaded) if downloaded is not None else None,
|
||||
total=int(total) if total is not None else None,
|
||||
label=label,
|
||||
file=sys.stderr,
|
||||
)
|
||||
elif event == "finished":
|
||||
_YTDLP_PROGRESS_BAR.finish()
|
||||
if use_live:
|
||||
try:
|
||||
if _YTDLP_TRANSFER_STATE.get(label, {}).get("started"):
|
||||
pipeline.finish_transfer(label=label)
|
||||
except Exception:
|
||||
pass
|
||||
_YTDLP_TRANSFER_STATE.pop(label, None)
|
||||
else:
|
||||
_YTDLP_PROGRESS_BAR.finish()
|
||||
elif event in ("postprocessing", "processing"):
|
||||
return
|
||||
|
||||
|
||||
Reference in New Issue
Block a user