This commit is contained in:
2026-01-05 07:51:19 -08:00
parent 8545367e28
commit 1f765cffda
32 changed files with 3447 additions and 3250 deletions

View File

@@ -10,10 +10,24 @@ Provides synchronous and asynchronous HTTP operations with:
import httpx
import asyncio
from typing import Optional, Dict, Any, Callable, BinaryIO
import sys
import time
import traceback
import re
from typing import Optional, Dict, Any, Callable, BinaryIO, List, Iterable, Set
from pathlib import Path
from urllib.parse import unquote, urlparse, parse_qs
import logging
from SYS.logger import debug, log
from SYS.models import DebugLogger, DownloadError, DownloadMediaResult, ProgressBar
from SYS.utils import ensure_directory, sha256_file
try: # Optional; used for metadata extraction when available
from SYS.metadata import extract_ytdlp_tags
except Exception: # pragma: no cover - optional dependency
extract_ytdlp_tags = None # type: ignore[assignment]
logger = logging.getLogger(__name__)
# Default configuration
@@ -366,6 +380,359 @@ class HTTPClient:
return self._client.stream(method, url, **kwargs)
def download_direct_file(
url: str,
output_dir: Path,
debug_logger: Optional[DebugLogger] = None,
quiet: bool = False,
suggested_filename: Optional[str] = None,
pipeline_progress: Optional[Any] = None,
) -> DownloadMediaResult:
"""Download a direct file (PDF, image, document, etc.) with guardrails and metadata hooks."""
ensure_directory(output_dir)
def _sanitize_filename(name: str) -> str:
# Windows-safe filename sanitization.
text = str(name or "").strip()
if not text:
return ""
text = text.replace("/", "\\")
text = text.split("\\")[-1]
invalid = set('<>:"/\\|?*')
cleaned_chars: List[str] = []
for ch in text:
o = ord(ch)
if o < 32 or ch in invalid:
cleaned_chars.append(" ")
continue
cleaned_chars.append(ch)
cleaned = " ".join("".join(cleaned_chars).split()).strip()
cleaned = cleaned.rstrip(" .")
return cleaned
def _unique_path(path: Path) -> Path:
if not path.exists():
return path
stem = path.stem
suffix = path.suffix
parent = path.parent
for i in range(1, 10_000):
candidate = parent / f"{stem} ({i}){suffix}"
if not candidate.exists():
return candidate
return parent / f"{stem} ({int(time.time())}){suffix}"
parsed_url = urlparse(url)
url_path = parsed_url.path
filename: Optional[str] = None
if parsed_url.query:
query_params = parse_qs(parsed_url.query)
for param_name in ("filename", "download", "file", "name"):
if param_name in query_params and query_params[param_name]:
filename = query_params[param_name][0]
filename = unquote(filename)
break
if not filename or not filename.strip():
filename = url_path.split("/")[-1] if url_path else ""
filename = unquote(filename)
if "?" in filename:
filename = filename.split("?")[0]
content_type = ""
try:
with HTTPClient(timeout=10.0) as client:
response = client._request("HEAD", url, follow_redirects=True)
content_disposition = response.headers.get("content-disposition", "")
try:
content_type = str(response.headers.get("content-type", "") or "").strip().lower()
except Exception:
content_type = ""
if content_disposition:
match = re.search(r'filename\*?=(?:"([^"]*)"|([^;\s]*))', content_disposition)
if match:
extracted_name = match.group(1) or match.group(2)
if extracted_name:
filename = unquote(extracted_name)
if not quiet:
debug(f"Filename from Content-Disposition: {filename}")
except Exception as exc:
if not quiet:
log(f"Could not get filename from headers: {exc}", file=sys.stderr)
try:
page_like_exts = {".php", ".asp", ".aspx", ".jsp", ".cgi"}
ext = ""
try:
ext = Path(str(filename or "")).suffix.lower()
except Exception:
ext = ""
ct0 = (content_type or "").split(";", 1)[0].strip().lower()
must_probe = bool(ct0.startswith("text/html") or ext in page_like_exts)
if must_probe:
with HTTPClient(timeout=10.0) as client:
with client._request_stream("GET", url, follow_redirects=True) as resp:
resp.raise_for_status()
ct = (
str(resp.headers.get("content-type", "") or "")
.split(";", 1)[0]
.strip()
.lower()
)
if ct.startswith("text/html"):
raise DownloadError("URL appears to be an HTML page, not a direct file")
except DownloadError:
raise
except Exception:
pass
suggested = _sanitize_filename(suggested_filename) if suggested_filename else ""
if suggested:
suggested_path = Path(suggested)
if suggested_path.suffix:
filename = suggested
else:
detected_ext = ""
try:
detected_ext = Path(str(filename)).suffix
except Exception:
detected_ext = ""
filename = suggested + detected_ext if detected_ext else suggested
try:
has_ext = bool(filename and Path(str(filename)).suffix)
except Exception:
has_ext = False
if filename and (not has_ext):
ct = (content_type or "").split(";", 1)[0].strip().lower()
ext_by_ct = {
"application/pdf": ".pdf",
"application/epub+zip": ".epub",
"application/x-mobipocket-ebook": ".mobi",
"image/jpeg": ".jpg",
"image/png": ".png",
"image/webp": ".webp",
"image/gif": ".gif",
"text/plain": ".txt",
"application/zip": ".zip",
}
if ct in ext_by_ct:
filename = f"{filename}{ext_by_ct[ct]}"
elif ct.startswith("text/html"):
raise DownloadError("URL appears to be an HTML page, not a direct file")
if not filename or not str(filename).strip():
raise DownloadError(
"Could not determine filename for URL (no Content-Disposition and no path filename)"
)
file_path = _unique_path(output_dir / str(filename))
use_pipeline_transfer = False
try:
if pipeline_progress is not None and hasattr(pipeline_progress, "update_transfer"):
ui = None
if hasattr(pipeline_progress, "ui_and_pipe_index"):
ui, _ = pipeline_progress.ui_and_pipe_index() # type: ignore[attr-defined]
use_pipeline_transfer = ui is not None
except Exception:
use_pipeline_transfer = False
progress_bar: Optional[ProgressBar] = None
if (not quiet) and (not use_pipeline_transfer):
progress_bar = ProgressBar()
transfer_started = [False]
if not quiet:
debug(f"Direct download: {filename}")
try:
start_time = time.time()
downloaded_bytes = [0]
transfer_started[0] = False
def _maybe_begin_transfer(content_length: int) -> None:
if pipeline_progress is None or transfer_started[0]:
return
try:
total_val: Optional[int] = (
int(content_length)
if isinstance(content_length, int) and content_length > 0
else None
)
except Exception:
total_val = None
try:
if hasattr(pipeline_progress, "begin_transfer"):
pipeline_progress.begin_transfer(
label=str(filename or "download"),
total=total_val,
)
transfer_started[0] = True
except Exception:
return
def progress_callback(bytes_downloaded: int, content_length: int) -> None:
downloaded_bytes[0] = int(bytes_downloaded or 0)
try:
if pipeline_progress is not None and hasattr(pipeline_progress, "update_transfer"):
_maybe_begin_transfer(content_length)
total_val: Optional[int] = (
int(content_length)
if isinstance(content_length, int) and content_length > 0
else None
)
pipeline_progress.update_transfer(
label=str(filename or "download"),
completed=int(bytes_downloaded or 0),
total=total_val,
)
except Exception:
pass
if progress_bar is not None:
progress_bar.update(
downloaded=int(bytes_downloaded or 0),
total=int(content_length) if content_length and content_length > 0 else None,
label=str(filename or "download"),
file=sys.stderr,
)
with HTTPClient(timeout=30.0) as client:
client.download(url, str(file_path), progress_callback=progress_callback)
elapsed = time.time() - start_time
try:
if progress_bar is not None:
progress_bar.finish()
except Exception:
pass
try:
if pipeline_progress is not None and transfer_started[0] and hasattr(
pipeline_progress, "finish_transfer"
):
pipeline_progress.finish_transfer(label=str(filename or "download"))
except Exception:
pass
if not quiet:
debug(f"✓ Downloaded in {elapsed:.1f}s")
ext_out = ""
try:
ext_out = Path(str(filename)).suffix.lstrip(".")
except Exception:
ext_out = ""
info: Dict[str, Any] = {
"id": str(filename).rsplit(".", 1)[0] if "." in str(filename) else str(filename),
"ext": ext_out,
"webpage_url": url,
}
hash_value = None
try:
hash_value = sha256_file(file_path)
except Exception:
pass
tags: List[str] = []
if extract_ytdlp_tags:
try:
tags = extract_ytdlp_tags(info)
except Exception as exc:
log(f"Error extracting tags: {exc}", file=sys.stderr)
if not any(str(t).startswith("title:") for t in tags):
info["title"] = str(filename)
tags = []
if extract_ytdlp_tags:
try:
tags = extract_ytdlp_tags(info)
except Exception as exc:
log(f"Error extracting tags with filename: {exc}", file=sys.stderr)
if debug_logger is not None:
debug_logger.write_record(
"direct-file-downloaded",
{"url": url, "path": str(file_path), "hash": hash_value},
)
return DownloadMediaResult(
path=file_path,
info=info,
tag=tags,
source_url=url,
hash_value=hash_value,
)
except (httpx.HTTPError, httpx.RequestError) as exc:
try:
if progress_bar is not None:
progress_bar.finish()
except Exception:
pass
try:
if pipeline_progress is not None and transfer_started[0] and hasattr(
pipeline_progress, "finish_transfer"
):
pipeline_progress.finish_transfer(label=str(filename or "download"))
except Exception:
pass
log(f"Download error: {exc}", file=sys.stderr)
if debug_logger is not None:
debug_logger.write_record(
"exception",
{"phase": "direct-file", "url": url, "error": str(exc)},
)
raise DownloadError(f"Failed to download {url}: {exc}") from exc
except Exception as exc:
try:
if progress_bar is not None:
progress_bar.finish()
except Exception:
pass
try:
if pipeline_progress is not None and transfer_started[0] and hasattr(
pipeline_progress, "finish_transfer"
):
pipeline_progress.finish_transfer(label=str(filename or "download"))
except Exception:
pass
log(f"Error downloading file: {exc}", file=sys.stderr)
if debug_logger is not None:
debug_logger.write_record(
"exception",
{
"phase": "direct-file",
"url": url,
"error": str(exc),
"traceback": traceback.format_exc(),
},
)
raise DownloadError(f"Error downloading file: {exc}") from exc
# Back-compat alias
_download_direct_file = download_direct_file
class AsyncHTTPClient:
"""Unified async HTTP client with asyncio support."""

View File

@@ -11,6 +11,7 @@ import shutil
import subprocess
import sys
import time
from collections import deque
from SYS.logger import log
from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS as GLOBAL_SUPPORTED_EXTENSIONS
@@ -18,8 +19,8 @@ import tempfile
import logging
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Iterable, Optional, Sequence, Type, TypeVar, Union, cast
from urllib.parse import urlsplit, urlencode, quote
from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple, Type, TypeVar, Union, cast
from urllib.parse import urlsplit, urlencode, quote, urlunsplit, unquote
import httpx
logger = logging.getLogger(__name__)
@@ -1828,3 +1829,742 @@ def download_hydrus_file(
print_final_progress(filename, file_size, elapsed)
return downloaded
# ============================================================================
# Hydrus metadata helpers (moved from SYS.metadata)
# ============================================================================
def _normalize_hash(value: Any) -> str:
candidate = str(value or "").strip().lower()
if not candidate:
raise ValueError("Hydrus hash is required")
if len(candidate) != 64 or any(ch not in "0123456789abcdef" for ch in candidate):
raise ValueError("Hydrus hash must be a 64-character hex string")
return candidate
def _normalize_tag(tag: Any) -> Optional[str]:
if tag is None:
return None
if isinstance(tag, str):
candidate = tag.strip()
else:
candidate = str(tag).strip()
return candidate or None
def _dedup_tags_by_namespace(tags: List[str], keep_first: bool = True) -> List[str]:
if not tags:
return []
namespace_to_tags: Dict[Optional[str], List[Tuple[int, str]]] = {}
first_appearance: Dict[Optional[str], int] = {}
for idx, tag in enumerate(tags):
namespace: Optional[str] = tag.split(":", 1)[0] if ":" in tag else None
if namespace not in first_appearance:
first_appearance[namespace] = idx
if namespace not in namespace_to_tags:
namespace_to_tags[namespace] = []
namespace_to_tags[namespace].append((idx, tag))
result: List[Tuple[int, str]] = []
for namespace, tag_list in namespace_to_tags.items():
chosen_tag = tag_list[0][1] if keep_first else tag_list[-1][1]
result.append((first_appearance[namespace], chosen_tag))
result.sort(key=lambda x: x[0])
return [tag for _, tag in result]
def _extract_tag_services(entry: Dict[str, Any]) -> List[Dict[str, Any]]:
tags_section = entry.get("tags")
services: List[Dict[str, Any]] = []
if not isinstance(tags_section, dict):
return services
names_map = tags_section.get("service_keys_to_names")
if not isinstance(names_map, dict):
names_map = {}
def get_record(service_key: Optional[str], service_name: Optional[str]) -> Dict[str, Any]:
key_lower = service_key.lower() if isinstance(service_key, str) else None
name_lower = service_name.lower() if isinstance(service_name, str) else None
for record in services:
existing_key = record.get("service_key")
if key_lower and isinstance(existing_key, str) and existing_key.lower() == key_lower:
if service_name and not record.get("service_name"):
record["service_name"] = service_name
return record
existing_name = record.get("service_name")
if name_lower and isinstance(existing_name, str) and existing_name.lower() == name_lower:
if service_key and not record.get("service_key"):
record["service_key"] = service_key
return record
record = {
"service_key": service_key,
"service_name": service_name,
"tags": [],
}
services.append(record)
return record
def _iter_current_status_lists(container: Any) -> Iterable[List[Any]]:
if isinstance(container, dict):
for status_key, tags_list in container.items():
if str(status_key) != "0":
continue
if isinstance(tags_list, list):
yield tags_list
elif isinstance(container, list):
yield container
statuses_map = tags_section.get("service_keys_to_statuses_to_tags")
if isinstance(statuses_map, dict):
for service_key, status_map in statuses_map.items():
record = get_record(service_key if isinstance(service_key, str) else None, names_map.get(service_key))
for tags_list in _iter_current_status_lists(status_map):
for tag in tags_list:
normalized = _normalize_tag(tag)
if normalized:
record["tags"].append(normalized)
ignored_keys = {
"service_keys_to_statuses_to_tags",
"service_keys_to_statuses_to_display_tags",
"service_keys_to_display_friendly_tags",
"service_keys_to_names",
"tag_display_types_to_namespaces",
"namespace_display_string_lookup",
"tag_display_decoration_colour_lookup",
}
for key, service in tags_section.items():
if key in ignored_keys:
continue
if isinstance(service, dict):
service_key = service.get("service_key") or (key if isinstance(key, str) else None)
service_name = service.get("service_name") or service.get("name") or names_map.get(service_key)
record = get_record(service_key if isinstance(service_key, str) else None, service_name)
storage = service.get("storage_tags") or service.get("statuses_to_tags") or service.get("tags")
if isinstance(storage, dict):
for tags_list in _iter_current_status_lists(storage):
for tag in tags_list:
normalized = _normalize_tag(tag)
if normalized:
record["tags"].append(normalized)
elif isinstance(storage, list):
for tag in storage:
normalized = _normalize_tag(tag)
if normalized:
record["tags"].append(normalized)
for record in services:
record["tags"] = _dedup_tags_by_namespace(record["tags"], keep_first=True)
return services
def _select_primary_tags(
services: List[Dict[str, Any]],
aggregated: List[str],
prefer_service: Optional[str]
) -> Tuple[Optional[str], List[str]]:
prefer_lower = prefer_service.lower() if isinstance(prefer_service, str) else None
if prefer_lower:
for record in services:
name = record.get("service_name")
if isinstance(name, str) and name.lower() == prefer_lower and record["tags"]:
return record.get("service_key"), record["tags"]
for record in services:
if record["tags"]:
return record.get("service_key"), record["tags"]
return None, aggregated
def _derive_title(
tags_primary: List[str],
tags_aggregated: List[str],
entry: Dict[str, Any]
) -> Optional[str]:
for source in (tags_primary, tags_aggregated):
for tag in source:
namespace, sep, value = tag.partition(":")
if sep and namespace and namespace.lower() == "title":
cleaned = value.strip()
if cleaned:
return cleaned
for key in (
"title",
"display_name",
"pretty_name",
"original_display_filename",
"original_filename",
):
value = entry.get(key)
if isinstance(value, str):
cleaned = value.strip()
if cleaned:
return cleaned
return None
def _derive_clip_time(
tags_primary: List[str],
tags_aggregated: List[str],
entry: Dict[str, Any]
) -> Optional[str]:
namespaces = {"clip", "clip_time", "cliptime"}
for source in (tags_primary, tags_aggregated):
for tag in source:
namespace, sep, value = tag.partition(":")
if sep and namespace and namespace.lower() in namespaces:
cleaned = value.strip()
if cleaned:
return cleaned
clip_value = entry.get("clip_time")
if isinstance(clip_value, str):
cleaned_clip = clip_value.strip()
if cleaned_clip:
return cleaned_clip
return None
def _summarize_hydrus_entry(
entry: Dict[str, Any],
prefer_service: Optional[str]
) -> Tuple[Dict[str, Any], List[str], Optional[str], Optional[str], Optional[str]]:
services = _extract_tag_services(entry)
aggregated: List[str] = []
seen: Set[str] = set()
for record in services:
for tag in record["tags"]:
if tag not in seen:
seen.add(tag)
aggregated.append(tag)
service_key, primary_tags = _select_primary_tags(services, aggregated, prefer_service)
title = _derive_title(primary_tags, aggregated, entry)
clip_time = _derive_clip_time(primary_tags, aggregated, entry)
summary = dict(entry)
if title and not summary.get("title"):
summary["title"] = title
if clip_time and not summary.get("clip_time"):
summary["clip_time"] = clip_time
summary["tag_service_key"] = service_key
summary["has_current_file_service"] = _has_current_file_service(entry)
if "is_local" not in summary:
summary["is_local"] = bool(entry.get("is_local"))
return summary, primary_tags, service_key, title, clip_time
def _looks_like_hash(value: Any) -> bool:
if not isinstance(value, str):
return False
candidate = value.strip().lower()
return len(candidate) == 64 and all(ch in "0123456789abcdef" for ch in candidate)
def _collect_relationship_hashes(payload: Any, accumulator: Set[str]) -> None:
if isinstance(payload, dict):
for value in payload.values():
_collect_relationship_hashes(value, accumulator)
elif isinstance(payload, (list, tuple, set)):
for value in payload:
_collect_relationship_hashes(value, accumulator)
elif isinstance(payload, str) and _looks_like_hash(payload):
accumulator.add(payload)
def _generate_hydrus_url_variants(url: str) -> List[str]:
seen: Set[str] = set()
variants: List[str] = []
def push(candidate: Optional[str]) -> None:
if not candidate:
return
text = candidate.strip()
if not text or text in seen:
return
seen.add(text)
variants.append(text)
push(url)
try:
parsed = urlsplit(url)
except Exception:
return variants
if parsed.scheme in {"http", "https"}:
alternate_scheme = "https" if parsed.scheme == "http" else "http"
push(urlunsplit((alternate_scheme, parsed.netloc, parsed.path, parsed.query, parsed.fragment)))
normalised_netloc = parsed.netloc.lower()
if normalised_netloc and normalised_netloc != parsed.netloc:
push(urlunsplit((parsed.scheme, normalised_netloc, parsed.path, parsed.query, parsed.fragment)))
if parsed.path:
trimmed_path = parsed.path.rstrip("/")
if trimmed_path != parsed.path:
push(urlunsplit((parsed.scheme, parsed.netloc, trimmed_path, parsed.query, parsed.fragment)))
else:
push(urlunsplit((parsed.scheme, parsed.netloc, parsed.path + "/", parsed.query, parsed.fragment)))
unquoted_path = unquote(parsed.path)
if unquoted_path != parsed.path:
push(urlunsplit((parsed.scheme, parsed.netloc, unquoted_path, parsed.query, parsed.fragment)))
if parsed.query or parsed.fragment:
push(urlunsplit((parsed.scheme, parsed.netloc, parsed.path, "", "")))
if parsed.path:
unquoted_path = unquote(parsed.path)
push(urlunsplit((parsed.scheme, parsed.netloc, unquoted_path, "", "")))
return variants
def _build_hydrus_query(
hashes: Optional[Sequence[str]],
file_ids: Optional[Sequence[int]],
include_relationships: bool,
minimal: bool,
) -> Dict[str, str]:
query: Dict[str, str] = {}
if hashes:
query["hashes"] = json.dumps([_normalize_hash(h) for h in hashes])
if file_ids:
query["file_ids"] = json.dumps([int(fid) for fid in file_ids])
if not query:
raise ValueError("hashes or file_ids must be provided")
query["include_service_keys_to_tags"] = json.dumps(True)
query["include_tag_services"] = json.dumps(True)
query["include_file_services"] = json.dumps(True)
if include_relationships:
query["include_file_relationships"] = json.dumps(True)
if not minimal:
extras = (
"include_url",
"include_size",
"include_width",
"include_height",
"include_duration",
"include_mime",
"include_has_audio",
"include_is_trashed",
)
for key in extras:
query[key] = json.dumps(True)
return query
def _fetch_hydrus_entries(
client: "HydrusNetwork",
hashes: Optional[Sequence[str]],
file_ids: Optional[Sequence[int]],
include_relationships: bool,
minimal: bool,
) -> List[Dict[str, Any]]:
if not hashes and not file_ids:
return []
spec = HydrusRequestSpec(
method="GET",
endpoint="/get_files/file_metadata",
query=_build_hydrus_query(hashes, file_ids, include_relationships, minimal),
)
response = client._perform_request(spec)
metadata = response.get("metadata") if isinstance(response, dict) else None
if isinstance(metadata, list):
return [entry for entry in metadata if isinstance(entry, dict)]
return []
def _has_current_file_service(entry: Dict[str, Any]) -> bool:
services = entry.get("file_services")
if not isinstance(services, dict):
return False
current = services.get("current")
if isinstance(current, dict):
for value in current.values():
if value:
return True
return False
if isinstance(current, list):
return len(current) > 0
return False
def _compute_file_flags(entry: Dict[str, Any]) -> Tuple[bool, bool, bool]:
mime = entry.get("mime")
mime_lower = mime.lower() if isinstance(mime, str) else ""
is_video = mime_lower.startswith("video/")
is_audio = mime_lower.startswith("audio/")
is_deleted = bool(entry.get("is_trashed"))
file_services = entry.get("file_services")
if not is_deleted and isinstance(file_services, dict):
deleted = file_services.get("deleted")
if isinstance(deleted, dict) and deleted:
is_deleted = True
return is_video, is_audio, is_deleted
def fetch_hydrus_metadata(payload: Dict[str, Any]) -> Dict[str, Any]:
hash_hex = None
raw_hash_value = payload.get("hash")
if raw_hash_value is not None:
hash_hex = _normalize_hash(raw_hash_value)
file_ids: List[int] = []
raw_file_ids = payload.get("file_ids")
if isinstance(raw_file_ids, (list, tuple, set)):
for value in raw_file_ids:
try:
file_ids.append(int(value))
except (TypeError, ValueError):
continue
elif raw_file_ids is not None:
try:
file_ids.append(int(raw_file_ids))
except (TypeError, ValueError):
file_ids = []
raw_file_id = payload.get("file_id")
if raw_file_id is not None:
try:
coerced = int(raw_file_id)
except (TypeError, ValueError):
coerced = None
if coerced is not None and coerced not in file_ids:
file_ids.append(coerced)
base_url = str(payload.get("api_url") or "").strip()
if not base_url:
raise ValueError("Hydrus api_url is required")
access_key = str(payload.get("access_key") or "").strip()
options_raw = payload.get("options")
options = options_raw if isinstance(options_raw, dict) else {}
prefer_service = options.get("prefer_service_name")
if isinstance(prefer_service, str):
prefer_service = prefer_service.strip()
else:
prefer_service = None
include_relationships = bool(options.get("include_relationships"))
minimal = bool(options.get("minimal"))
timeout = float(options.get("timeout") or 60.0)
client = HydrusNetwork(base_url, access_key, timeout)
hashes: Optional[List[str]] = None
if hash_hex:
hashes = [hash_hex]
if not hashes and not file_ids:
raise ValueError("Hydrus hash or file id is required")
try:
entries = _fetch_hydrus_entries(
client,
hashes,
file_ids or None,
include_relationships,
minimal
)
except HydrusRequestError as exc:
raise RuntimeError(str(exc))
if not entries:
response: Dict[str, Any] = {
"hash": hash_hex,
"metadata": {},
"tags": [],
"warnings": [f"No Hydrus metadata for {hash_hex or file_ids}"],
"error": "not_found",
}
if file_ids:
response["file_id"] = file_ids[0]
return response
entry = entries[0]
if not hash_hex:
entry_hash = entry.get("hash")
if isinstance(entry_hash, str) and entry_hash:
hash_hex = entry_hash
hashes = [hash_hex]
summary, primary_tags, service_key, title, clip_time = _summarize_hydrus_entry(entry, prefer_service)
is_video, is_audio, is_deleted = _compute_file_flags(entry)
has_current_file_service = _has_current_file_service(entry)
is_local = bool(entry.get("is_local"))
size_bytes = entry.get("size") or entry.get("file_size")
filesize_mb = None
if isinstance(size_bytes, (int, float)) and size_bytes > 0:
filesize_mb = float(size_bytes) / (1024.0 * 1024.0)
duration = entry.get("duration")
if duration is None and isinstance(entry.get("duration_ms"), (int, float)):
duration = float(entry["duration_ms"]) / 1000.0
warnings_list: List[str] = []
if not primary_tags:
warnings_list.append("No tags returned for preferred service")
relationships = None
relationship_metadata: Dict[str, Dict[str, Any]] = {}
if include_relationships and hash_hex:
try:
rel_spec = HydrusRequestSpec(
method="GET",
endpoint="/manage_file_relationships/get_file_relationships",
query={"hash": hash_hex},
)
relationships = client._perform_request(rel_spec)
except HydrusRequestError as exc:
warnings_list.append(f"Relationship lookup failed: {exc}")
relationships = None
if isinstance(relationships, dict):
related_hashes: Set[str] = set()
_collect_relationship_hashes(relationships, related_hashes)
related_hashes.discard(hash_hex)
if related_hashes:
try:
related_entries = _fetch_hydrus_entries(
client,
sorted(related_hashes),
None,
False,
True
)
except HydrusRequestError as exc:
warnings_list.append(f"Relationship metadata fetch failed: {exc}")
else:
for rel_entry in related_entries:
rel_hash = rel_entry.get("hash")
if not isinstance(rel_hash, str):
continue
rel_summary, rel_tags, _, rel_title, rel_clip = _summarize_hydrus_entry(rel_entry, prefer_service)
rel_summary["tags"] = rel_tags
if rel_title:
rel_summary["title"] = rel_title
if rel_clip:
rel_summary["clip_time"] = rel_clip
relationship_metadata[rel_hash] = rel_summary
result: Dict[str, Any] = {
"hash": entry.get("hash") or hash_hex,
"metadata": summary,
"tags": primary_tags,
"tag_service_key": service_key,
"title": title,
"clip_time": clip_time,
"duration": duration,
"filesize_mb": filesize_mb,
"is_video": is_video,
"is_audio": is_audio,
"is_deleted": is_deleted,
"is_local": is_local,
"has_current_file_service": has_current_file_service,
"matched_hash": entry.get("hash") or hash_hex,
"swap_recommended": False,
}
file_id_value = entry.get("file_id")
if isinstance(file_id_value, (int, float)):
result["file_id"] = int(file_id_value)
if relationships is not None:
result["relationships"] = relationships
if relationship_metadata:
result["relationship_metadata"] = relationship_metadata
if warnings_list:
result["warnings"] = warnings_list
return result
def fetch_hydrus_metadata_by_url(payload: Dict[str, Any]) -> Dict[str, Any]:
raw_url = payload.get("url") or payload.get("source_url")
url = str(raw_url or "").strip()
if not url:
raise ValueError("URL is required to fetch Hydrus metadata by URL")
base_url = str(payload.get("api_url") or "").strip()
if not base_url:
raise ValueError("Hydrus api_url is required")
access_key = str(payload.get("access_key") or "").strip()
options_raw = payload.get("options")
options = options_raw if isinstance(options_raw, dict) else {}
timeout = float(options.get("timeout") or 60.0)
client = HydrusNetwork(base_url, access_key, timeout)
hashes: Optional[List[str]] = None
file_ids: Optional[List[int]] = None
matched_url = None
normalised_reported = None
seen: Set[str] = set()
queue = deque()
for variant in _generate_hydrus_url_variants(url):
queue.append(variant)
if not queue:
queue.append(url)
tried_variants: List[str] = []
while queue:
candidate = queue.popleft()
candidate = str(candidate or "").strip()
if not candidate or candidate in seen:
continue
seen.add(candidate)
tried_variants.append(candidate)
spec = HydrusRequestSpec(
method="GET",
endpoint="/add_urls/get_url_files",
query={"url": candidate},
)
try:
response = client._perform_request(spec)
except HydrusRequestError as exc:
raise RuntimeError(str(exc))
response_hashes_list: List[str] = []
response_file_ids_list: List[int] = []
if isinstance(response, dict):
normalised_value = response.get("normalised_url")
if isinstance(normalised_value, str):
trimmed = normalised_value.strip()
if trimmed:
normalised_reported = normalised_reported or trimmed
if trimmed not in seen:
queue.append(trimmed)
for redirect_key in ("redirect_url", "url"):
redirect_value = response.get(redirect_key)
if isinstance(redirect_value, str):
redirect_trimmed = redirect_value.strip()
if redirect_trimmed and redirect_trimmed not in seen:
queue.append(redirect_trimmed)
raw_hashes = response.get("hashes") or response.get("file_hashes")
if isinstance(raw_hashes, list):
for item in raw_hashes:
try:
normalized = _normalize_hash(item)
except ValueError:
continue
if normalized:
response_hashes_list.append(normalized)
raw_ids = response.get("file_ids") or response.get("file_id")
if isinstance(raw_ids, list):
for item in raw_ids:
try:
response_file_ids_list.append(int(item))
except (TypeError, ValueError):
continue
elif raw_ids is not None:
try:
response_file_ids_list.append(int(raw_ids))
except (TypeError, ValueError):
pass
statuses = response.get("url_file_statuses")
if isinstance(statuses, list):
for entry in statuses:
if not isinstance(entry, dict):
continue
status_hash = entry.get("hash") or entry.get("file_hash")
if status_hash:
try:
normalized = _normalize_hash(status_hash)
except ValueError:
normalized = None
if normalized:
response_hashes_list.append(normalized)
status_id = entry.get("file_id") or entry.get("fileid")
if status_id is not None:
try:
response_file_ids_list.append(int(status_id))
except (TypeError, ValueError):
pass
if not hashes and response_hashes_list:
hashes = response_hashes_list
if not file_ids and response_file_ids_list:
file_ids = response_file_ids_list
if hashes or file_ids:
matched_url = candidate
break
if not hashes and not file_ids:
raise RuntimeError(
"No Hydrus matches for URL variants: "
+ ", ".join(tried_variants)
)
followup_payload = {
"api_url": base_url,
"access_key": access_key,
"hash": hashes[0] if hashes else None,
"file_ids": file_ids,
"options": {"timeout": timeout, "minimal": True},
}
result = fetch_hydrus_metadata(followup_payload)
result["matched_url"] = matched_url or url
result["normalised_url"] = normalised_reported or matched_url or url
result["tried_urls"] = tried_variants
return result
def _build_hydrus_context(payload: Dict[str, Any]) -> Tuple["HydrusNetwork", str, str, float, Optional[str]]:
base_url = str(payload.get("api_url") or "").strip()
if not base_url:
raise ValueError("Hydrus api_url is required")
access_key = str(payload.get("access_key") or "").strip()
options_raw = payload.get("options")
options = options_raw if isinstance(options_raw, dict) else {}
timeout = float(options.get("timeout") or payload.get("timeout") or 60.0)
prefer_service = payload.get("prefer_service_name") or options.get("prefer_service_name")
if isinstance(prefer_service, str):
prefer_service = prefer_service.strip() or None
else:
prefer_service = None
client = HydrusNetwork(base_url, access_key, timeout)
return client, base_url, access_key, timeout, prefer_service
def _refetch_hydrus_summary(
base_url: str,
access_key: str,
hash_hex: str,
timeout: float,
prefer_service: Optional[str]
) -> Dict[str, Any]:
payload: Dict[str, Any] = {
"hash": hash_hex,
"api_url": base_url,
"access_key": access_key,
"options": {
"minimal": True,
"include_relationships": False,
"timeout": timeout,
},
}
if prefer_service:
payload["options"]["prefer_service_name"] = prefer_service
return fetch_hydrus_metadata(payload)
def apply_hydrus_tag_mutation(
payload: Dict[str, Any],
add: Iterable[Any],
remove: Iterable[Any]
) -> Dict[str, Any]:
client, base_url, access_key, timeout, prefer_service = _build_hydrus_context(payload)
hash_hex = _normalize_hash(payload.get("hash"))
add_list = [_normalize_tag(tag) for tag in add if _normalize_tag(tag)]
remove_list = [_normalize_tag(tag) for tag in remove if _normalize_tag(tag)]
if not add_list and not remove_list:
raise ValueError("No tag changes supplied")
service_key = payload.get("service_key") or payload.get("tag_service_key")
summary = None
if not service_key:
summary = _refetch_hydrus_summary(base_url, access_key, hash_hex, timeout, prefer_service)
service_key = summary.get("tag_service_key")
if not isinstance(service_key, str) or not service_key:
raise RuntimeError("Unable to determine Hydrus tag service key")
actions: Dict[str, List[str]] = {}
if add_list:
actions["0"] = [tag for tag in add_list if tag]
if remove_list:
actions["1"] = [tag for tag in remove_list if tag]
if not actions:
raise ValueError("Tag mutation produced no actionable changes")
request_payload = {
"hashes": [hash_hex],
"service_keys_to_actions_to_tags": {
service_key: actions,
},
}
try:
tag_spec = HydrusRequestSpec(
method="POST",
endpoint="/add_tags/add_tags",
data=request_payload,
)
client._perform_request(tag_spec)
except HydrusRequestError as exc:
raise RuntimeError(str(exc))
summary_after = _refetch_hydrus_summary(base_url, access_key, hash_hex, timeout, prefer_service)
result = dict(summary_after)
result["added_tags"] = actions.get("0", [])
result["removed_tags"] = actions.get("1", [])
result["tag_service_key"] = summary_after.get("tag_service_key")
return result

27
CLI.py
View File

@@ -845,21 +845,8 @@ class CmdletIntrospection:
providers.keys()
)
try:
from Provider.metadata_provider import list_metadata_providers
meta_providers = list_metadata_providers(config) or {}
meta_available = [n for n, ready in meta_providers.items() if ready]
meta_choices = (
sorted(meta_available)
if meta_available else sorted(meta_providers.keys())
)
except Exception:
meta_choices = []
merged = sorted(set(provider_choices + meta_choices))
if merged:
return merged
if provider_choices:
return provider_choices
if normalized_arg == "scrape":
try:
@@ -990,7 +977,15 @@ class CmdletCompleter(Completer):
config=config
)
if choices:
for choice in choices:
choice_list = choices
normalized_prev = prev_token.lstrip("-").strip().lower()
if normalized_prev == "provider" and current_token:
current_lower = current_token.lower()
filtered = [c for c in choices if current_lower in c.lower()]
if filtered:
choice_list = filtered
for choice in choice_list:
yield Completion(choice, start_position=-len(current_token))
# Example: if the user has typed `download-file -url ...`, then `url`
# is considered used and should not be suggested again (even as `--url`).

View File

@@ -343,7 +343,7 @@ def _run_op(op: str, data: Any) -> Dict[str, Any]:
# Fast gate: only for streaming URLs yt-dlp knows about.
try:
from SYS.download import is_url_supported_by_ytdlp # noqa: WPS433
from tool.ytdlp import is_url_supported_by_ytdlp # noqa: WPS433
if not is_url_supported_by_ytdlp(url):
return {

View File

@@ -6,20 +6,22 @@ osd-bar=no
border=no
# Keep the window size stable when loading files (don't resize to match aspect).
keepaspect-window=no
# Ensure uosc texture/icon fonts are discoverable by libass.
osd-fonts-dir=~~/scripts/uosc/fonts
sub-fonts-dir=~~/scripts/uosc/
auto-window-resize=no
ontop=yes
autofit=100%
save-position-on-quit=yes
# Avoid showing embedded cover art for audio-only files.
audio-display=no
# Stretch the video to fill the window (ignore aspect ratio, may distort)
keepaspect=no
video-unscaled=no
cursor-autohide=1000
# gpu-next can be fragile on some Windows/D3D11 setups; prefer the stable VO.
vo=gpu
@@ -34,7 +36,7 @@ background=none
background-color=0/0
# Without transparency, these options may be useful:
background-color=.2 # don't use pure black
# background-color=.2 # don't use pure black (disabled to keep video background transparent)
force-window-position # recenter the window when changing playlist position on X11 and macOS
auto-window-resize=no # preserve the window size when changing playlist entry
@@ -79,11 +81,3 @@ reset-on-next-file-remove=video-zoom # preserve the zoom when changing file
reset-on-next-file-remove=panscan
reset-on-next-file-remove=video-unscaled
linear-downscaling=no # don't make black and white manga brighter
git config --global user.name "Nose"
git config --global user.email "goyimnose@nothing.blah"
ssh-keygen -t ed25519 -C "goyimnose@nothing.blah" -f $env:USERPROFILE\.ssh\id_ed25519
git remote set-url origin goyimnose@nothing.blah:OWNER/REPO.git

View File

@@ -182,7 +182,7 @@ time_precision=0
# Display stream's buffered time in timeline if it's lower than this amount of seconds, 0 to disable
buffered_time_threshold=60
# Hide UI when mpv autohides the cursor. Timing is controlled by `cursor-autohide` in `mpv.conf` (in milliseconds).
autohide=no
autohide=yes
# Can be: flash, static, manual (controlled by flash-pause-indicator and decide-pause-indicator commands)
pause_indicator=flash
# Sizes to list in stream quality menu

View File

@@ -1,12 +1,15 @@
from __future__ import annotations
import os
import random
import re
import shutil
import string
import subprocess
import time
import sys
from pathlib import Path
import subprocess
from typing import Any, Dict, List, Optional, Tuple
from typing import Any, Dict, Iterable, List, Optional, Tuple
from API.hifi import HifiApiClient
from ProviderCore.base import Provider, SearchResult
from SYS.logger import debug, log
@@ -733,6 +736,10 @@ class HIFI(Provider):
input_ref: str,
output_path: Path,
lossless_fallback: bool = True,
progress: Optional[Any] = None,
transfer_label: Optional[str] = None,
duration_seconds: Optional[int] = None,
audio_quality: Optional[str] = None,
) -> Optional[Path]:
ffmpeg_path = self._find_ffmpeg()
if not ffmpeg_path:
@@ -749,20 +756,115 @@ class HIFI(Provider):
protocol_whitelist = "file,https,http,tcp,tls,crypto,data"
def _run(cmd: List[str]) -> bool:
label = str(transfer_label or output_path.name or "hifi")
def _estimate_total_bytes() -> Optional[int]:
try:
proc = subprocess.run(
cmd,
capture_output=True,
text=True,
check=False,
dur = int(duration_seconds) if duration_seconds is not None else None
except Exception:
dur = None
if not dur or dur <= 0:
return None
qual = str(audio_quality or "").strip().lower()
# Rough per-quality bitrate guess (bytes/sec).
if qual in {"hi_res",
"hi_res_lossless",
"hires",
"hi-res",
"master",
"mqa"}:
bps = 4_608_000 # ~24-bit/96k stereo
elif qual in {"lossless",
"flac"}:
bps = 1_411_200 # 16-bit/44.1k stereo
else:
bps = 320_000 # kbps for compressed
try:
return int((bps / 8.0) * dur)
except Exception:
return None
est_total_bytes = _estimate_total_bytes()
def _update_transfer(total_bytes_val: Optional[int]) -> None:
if progress is None:
return
try:
progress.update_transfer(
label=label,
completed=int(total_bytes_val) if total_bytes_val is not None else None,
total=est_total_bytes,
)
except Exception:
pass
def _run(cmd: List[str], *, target_path: Optional[Path] = None) -> bool:
cmd_progress = list(cmd)
# Enable ffmpeg progress output for live byte updates.
cmd_progress.insert(1, "-progress")
cmd_progress.insert(2, "pipe:1")
cmd_progress.insert(3, "-nostats")
try:
proc = subprocess.Popen(
cmd_progress,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
if proc.returncode == 0 and self._has_nonempty_file(output_path):
return True
if proc.stderr:
debug(f"[hifi] ffmpeg failed: {proc.stderr.strip()}")
except Exception as exc:
debug(f"[hifi] ffmpeg invocation failed: {exc}")
return False
last_bytes = None
try:
while True:
line = proc.stdout.readline() if proc.stdout else ""
if not line:
if proc.poll() is not None:
break
time.sleep(0.05)
continue
if "=" not in line:
continue
key, val = line.strip().split("=", 1)
if key == "total_size":
try:
last_bytes = int(val)
_update_transfer(last_bytes)
except Exception:
pass
elif key == "out_time_ms":
# Map out_time_ms to byte estimate when total_size missing.
try:
if est_total_bytes and val.isdigit():
ms = int(val)
dur_ms = (duration_seconds or 0) * 1000
if dur_ms > 0:
pct = min(1.0, max(0.0, ms / dur_ms))
approx = int(est_total_bytes * pct)
_update_transfer(approx)
except Exception:
pass
proc.wait()
finally:
if last_bytes is not None:
_update_transfer(last_bytes)
check_path = target_path or output_path
if proc.returncode == 0 and self._has_nonempty_file(check_path):
return True
try:
stderr_text = proc.stderr.read() if proc.stderr else ""
if stderr_text:
debug(f"[hifi] ffmpeg failed: {stderr_text.strip()}")
except Exception:
pass
return False
# Prefer remux (fast, no transcode).
@@ -816,25 +918,14 @@ class HIFI(Provider):
"flac",
str(tmp_flac_path),
]
try:
proc = subprocess.run(
cmd_flac,
capture_output=True,
text=True,
check=False,
)
if proc.returncode == 0 and self._has_nonempty_file(tmp_flac_path):
if tmp_flac_path != flac_path:
try:
tmp_flac_path.replace(flac_path)
except Exception:
# If rename fails, still return the temp file.
return tmp_flac_path
return flac_path
if proc.stderr:
debug(f"[hifi] ffmpeg flac fallback failed: {proc.stderr.strip()}")
except Exception as exc:
debug(f"[hifi] ffmpeg flac fallback invocation failed: {exc}")
if _run(cmd_flac, target_path=tmp_flac_path) and self._has_nonempty_file(tmp_flac_path):
if tmp_flac_path != flac_path:
try:
tmp_flac_path.replace(flac_path)
except Exception:
# If rename fails, still return the temp file.
return tmp_flac_path
return flac_path
return None
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
@@ -921,7 +1012,14 @@ class HIFI(Provider):
# If resolve_tidal_manifest_path returned a URL, prefer feeding it directly to ffmpeg.
if resolved_text.lower().startswith("http"):
out_file = output_dir / f"{stem}{suffix}"
materialized = self._ffmpeg_demux_to_audio(input_ref=resolved_text, output_path=out_file)
materialized = self._ffmpeg_demux_to_audio(
input_ref=resolved_text,
output_path=out_file,
progress=self.config.get("_pipeline_progress") if isinstance(self.config, dict) else None,
transfer_label=title_part or getattr(result, "title", None),
duration_seconds=self._coerce_duration_seconds(md),
audio_quality=md.get("audioQuality") if isinstance(md, dict) else None,
)
if materialized is not None:
return materialized
@@ -947,7 +1045,14 @@ class HIFI(Provider):
if source_path.is_file() and source_path.suffix.lower() == ".mpd":
# Materialize audio from the local MPD.
out_file = output_dir / f"{stem}{suffix}"
materialized = self._ffmpeg_demux_to_audio(input_ref=str(source_path), output_path=out_file)
materialized = self._ffmpeg_demux_to_audio(
input_ref=str(source_path),
output_path=out_file,
progress=self.config.get("_pipeline_progress") if isinstance(self.config, dict) else None,
transfer_label=title_part or getattr(result, "title", None),
duration_seconds=self._coerce_duration_seconds(md),
audio_quality=md.get("audioQuality") if isinstance(md, dict) else None,
)
if materialized is not None:
return materialized
return None
@@ -965,7 +1070,14 @@ class HIFI(Provider):
# As a last resort, attempt to treat the local path as an ffmpeg input.
out_file = output_dir / f"{stem}{suffix}"
materialized = self._ffmpeg_demux_to_audio(input_ref=resolved_text, output_path=out_file)
materialized = self._ffmpeg_demux_to_audio(
input_ref=resolved_text,
output_path=out_file,
progress=self.config.get("_pipeline_progress") if isinstance(self.config, dict) else None,
transfer_label=title_part or getattr(result, "title", None),
duration_seconds=self._coerce_duration_seconds(md),
audio_quality=md.get("audioQuality") if isinstance(md, dict) else None,
)
return materialized
def _get_api_client_for_base(self, base_url: str) -> Optional[HifiApiClient]:
@@ -1228,6 +1340,38 @@ class HIFI(Provider):
minutes, secs = divmod(total, 60)
return f"{minutes}:{secs:02d}"
@staticmethod
def _coerce_duration_seconds(value: Any) -> Optional[int]:
candidates = []
candidates.append(value)
try:
if isinstance(value, dict):
for key in ("duration",
"durationSeconds",
"duration_sec",
"duration_ms",
"durationMillis"):
if key in value:
candidates.append(value.get(key))
except Exception:
pass
for cand in candidates:
try:
if cand is None:
continue
if isinstance(cand, str) and cand.strip().endswith("ms"):
cand = cand.strip()[:-2]
v = float(cand)
if v <= 0:
continue
if v > 10_000: # treat as milliseconds
v = v / 1000.0
return int(round(v))
except Exception:
continue
return None
@staticmethod
def _stringify(value: Any) -> str:
text = str(value or "").strip()
@@ -1305,23 +1449,18 @@ class HIFI(Provider):
if audio_quality:
columns.append(("Quality", audio_quality))
tags = {"tidal"}
if audio_quality:
tags.add(f"quality:{audio_quality.lower()}")
metadata = item.get("mediaMetadata")
if isinstance(metadata, dict):
tag_values = metadata.get("tags") or []
for tag in tag_values:
if isinstance(tag, str) and tag.strip():
tags.add(tag.strip().lower())
# IMPORTANT: do not retain a shared reference to the raw API dict.
# Downstream playback (MPV) mutates metadata to cache the decoded Tidal
# manifest path/URL. If multiple results share the same dict reference,
# they can incorrectly collapse to a single playable target.
full_md: Dict[str, Any] = dict(item)
url_value = self._stringify(full_md.get("url"))
if url_value:
full_md["url"] = url_value
return SearchResult(
tags = self._build_track_tags(full_md)
result = SearchResult(
table="hifi",
title=title,
path=path,
@@ -1332,6 +1471,12 @@ class HIFI(Provider):
columns=columns,
full_metadata=full_md,
)
if url_value:
try:
result.url = url_value
except Exception:
pass
return result
def _extract_track_selection_context(
self, selected_items: List[Any]
@@ -1401,6 +1546,9 @@ class HIFI(Provider):
def _fetch_track_details(self, track_id: int) -> Optional[Dict[str, Any]]:
if track_id <= 0:
return None
info_data = self._fetch_track_info(track_id)
for base in self.api_urls:
endpoint = f"{base.rstrip('/')}/track/"
try:
@@ -1408,12 +1556,32 @@ class HIFI(Provider):
payload = client.track(track_id) if client else None
data = payload.get("data") if isinstance(payload, dict) else None
if isinstance(data, dict):
return data
merged: Dict[str, Any] = {}
if isinstance(info_data, dict):
merged.update(info_data)
merged.update(data)
return merged
except Exception as exc:
log(f"[hifi] Track lookup failed for {endpoint}: {exc}", file=sys.stderr)
continue
return None
def _fetch_track_info(self, track_id: int) -> Optional[Dict[str, Any]]:
if track_id <= 0:
return None
for base in self.api_urls:
endpoint = f"{base.rstrip('/')}/info/"
try:
client = self._get_api_client_for_base(base)
payload = client.info(track_id) if client else None
data = payload.get("data") if isinstance(payload, dict) else None
if isinstance(data, dict):
return data
except Exception as exc:
debug(f"[hifi] Info lookup failed for {endpoint}: {exc}")
continue
return None
def _fetch_track_lyrics(self, track_id: int) -> Optional[Dict[str, Any]]:
if track_id <= 0:
return None
@@ -1450,6 +1618,54 @@ class HIFI(Provider):
]
return [(name, value) for name, value in values if value]
def _build_track_tags(self, metadata: Dict[str, Any]) -> set[str]:
tags: set[str] = {"tidal"}
audio_quality = self._stringify(metadata.get("audioQuality"))
if audio_quality:
tags.add(f"quality:{audio_quality.lower()}")
media_md = metadata.get("mediaMetadata")
if isinstance(media_md, dict):
tag_values = media_md.get("tags") or []
for tag in tag_values:
if isinstance(tag, str):
candidate = tag.strip()
if candidate:
tags.add(candidate.lower())
title_text = self._stringify(metadata.get("title"))
if title_text:
tags.add(f"title:{title_text}")
artists = self._extract_artists(metadata)
for artist in artists:
artist_clean = self._stringify(artist)
if artist_clean:
tags.add(f"artist:{artist_clean}")
album_title = ""
album_obj = metadata.get("album")
if isinstance(album_obj, dict):
album_title = self._stringify(album_obj.get("title"))
else:
album_title = self._stringify(metadata.get("album"))
if album_title:
tags.add(f"album:{album_title}")
track_no_val = metadata.get("trackNumber") or metadata.get("track_number")
if track_no_val is not None:
try:
track_int = int(track_no_val)
if track_int > 0:
tags.add(f"track:{track_int}")
except Exception:
track_text = self._stringify(track_no_val)
if track_text:
tags.add(f"track:{track_text}")
return tags
def selector(
self,
selected_items: List[Any],
@@ -1476,16 +1692,32 @@ class HIFI(Provider):
else None
)
try:
debug(
f"[hifi.selector] table_type={table_type} stage_is_last={stage_is_last} selected_count={len(selected_items) if selected_items else 0}"
)
except Exception:
pass
# Artist selection: selecting @N should open an albums list.
if isinstance(table_type, str) and table_type.strip().lower() == "hifi.artist":
contexts = self._extract_artist_selection_context(selected_items)
try:
debug(f"[hifi.selector] artist contexts={len(contexts)}")
except Exception:
pass
if not contexts:
return False
artist_id, artist_name = contexts[0]
album_results = self._albums_for_artist(artist_id=artist_id, artist_name=artist_name, limit=200)
if not album_results:
return False
try:
from SYS.rich_display import stdout_console
stdout_console().print(f"[bold yellow][hifi] No albums found for {artist_name}[/]")
except Exception:
log(f"[hifi] No albums found for {artist_name}")
return True
try:
from SYS.rich_display import stdout_console
@@ -1531,6 +1763,10 @@ class HIFI(Provider):
# Album selection: selecting @N should open the track list for that album.
if isinstance(table_type, str) and table_type.strip().lower() == "hifi.album":
contexts = self._extract_album_selection_context(selected_items)
try:
debug(f"[hifi.selector] album contexts={len(contexts)}")
except Exception:
pass
if not contexts:
return False
@@ -1605,6 +1841,10 @@ class HIFI(Provider):
return False
contexts = self._extract_track_selection_context(selected_items)
try:
debug(f"[hifi.selector] track contexts={len(contexts)}")
except Exception:
pass
if not contexts:
return False
@@ -1657,6 +1897,9 @@ class HIFI(Provider):
insert_pos = 2 if artist_display else 1
columns.insert(insert_pos, ("Album", album_title))
tags = self._build_track_tags(detail)
url_value = self._stringify(detail.get("url"))
result = SearchResult(
table="hifi",
title=title,
@@ -1666,7 +1909,13 @@ class HIFI(Provider):
media_kind="audio",
columns=columns,
full_metadata=detail,
tag=tags,
)
if url_value:
try:
result.url = url_value
except Exception:
pass
table.add_result(result)
try:
results_payload.append(result.to_dict())

View File

@@ -8,12 +8,11 @@ from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Callable, Tuple
from urllib.parse import urlparse
from API.HTTP import HTTPClient
from API.HTTP import HTTPClient, _download_direct_file
from API.alldebrid import AllDebridClient, parse_magnet_or_hash, is_torrent_file
from ProviderCore.base import Provider, SearchResult
from ProviderCore.download import sanitize_filename
from SYS.download import _download_direct_file
from SYS.logger import log
from SYS.logger import log, debug
from SYS.models import DownloadError
_HOSTS_CACHE_TTL_SECONDS = 24 * 60 * 60
@@ -302,7 +301,7 @@ def _dispatch_alldebrid_magnet_search(
)
except Exception:
pass
log(f"[alldebrid] Sent magnet {magnet_id} to AllDebrid for download", file=sys.stderr)
debug(f"[alldebrid] Sent magnet {magnet_id} to AllDebrid for download")
def prepare_magnet(

192
Provider/hello_provider.py Normal file
View File

@@ -0,0 +1,192 @@
"""Example provider template for use as a starter kit.
This minimal provider demonstrates the typical hooks a provider may implement:
- `validate()` to assert it's usable
- `search()` to return `SearchResult` items
- `download()` to persist a sample file (useful for local tests)
See `docs/provider_guide.md` for authoring guidance.
"""
from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, List, Optional
from ProviderCore.base import Provider, SearchResult
class HelloProvider(Provider):
"""Very small example provider suitable as a template.
- Table name: `hello`
- Usage: `search-file -provider hello "query"`
- Selecting a row and piping into `download-file` will call `download()`.
"""
URL = ("hello:",)
URL_DOMAINS = ()
def validate(self) -> bool:
# No configuration required; always available for testing/demo purposes.
return True
def search(
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
q = (query or "").strip()
results: List[SearchResult] = []
if not q or q in {"*", "all", "list"}:
q = "example"
# Emit up to `limit` tiny example results.
n = min(max(1, int(limit)), 3)
for i in range(1, n + 1):
title = f"{q} sample {i}"
path = f"https://example.org/{q}/{i}"
sr = SearchResult(
table="hello",
title=title,
path=path,
detail="Example provider result",
media_kind="file",
columns=[("Example", "yes")],
full_metadata={"example_index": i},
)
results.append(sr)
return results[: max(0, int(limit))]
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
"""Create a small text file to simulate a download.
This keeps the example self-contained (no network access required) and
makes it straightforward to test provider behavior with `pytest`.
"""
try:
Path(output_dir).mkdir(parents=True, exist_ok=True)
except Exception:
pass
title = str(getattr(result, "title", "hello") or "hello").strip()
safe = "".join(c if c.isalnum() or c in ("-", "_", ".") else "_" for c in title)
fname = f"{safe}.txt" if safe else "hello.txt"
dest = Path(output_dir) / fname
try:
dest.write_text(f"Hello from HelloProvider\nsource: {result.path}\n", encoding="utf-8")
return dest
except Exception:
return None
def selector(
self,
selected_items: List[Any],
*,
ctx: Any,
stage_is_last: bool = True,
**_kwargs: Any,
) -> bool:
"""Present a simple details table when a HelloProvider row is selected.
This demonstrates how providers can implement custom `@N` selection
behavior by constructing a `ResultTable`, populating it with
provider-specific rows, and instructing the CLI to show the table.
"""
if not stage_is_last:
return False
def _as_payload(item: Any) -> Dict[str, Any]:
if isinstance(item, dict):
return dict(item)
try:
if hasattr(item, "to_dict"):
maybe = item.to_dict()
if isinstance(maybe, dict):
return maybe
except Exception:
pass
payload: Dict[str, Any] = {}
try:
payload = {
"title": getattr(item, "title", None),
"path": getattr(item, "path", None),
"table": getattr(item, "table", None),
"annotations": getattr(item, "annotations", None),
"media_kind": getattr(item, "media_kind", None),
"full_metadata": getattr(item, "full_metadata", None),
}
except Exception:
payload = {}
return payload
chosen: List[Dict[str, Any]] = []
for item in selected_items or []:
payload = _as_payload(item)
meta = payload.get("full_metadata") or {}
if not isinstance(meta, dict):
meta = {}
idx = meta.get("example_index")
if idx is None:
continue
title = str(payload.get("title") or payload.get("path") or "").strip() or f"hello-{idx}"
chosen.append({"index": idx, "title": title, "path": payload.get("path")})
if not chosen:
return False
target = chosen[0]
idx = target.get("index")
title = target.get("title") or f"hello-{idx}"
try:
from SYS.result_table import ResultTable
from SYS.rich_display import stdout_console
except Exception:
# If ResultTable isn't available, consider selection handled
return True
table = ResultTable(f"Hello Details: {title}").set_preserve_order(True)
table.set_table("hello")
try:
table.set_table_metadata({"provider": "hello", "view": "details", "example_index": idx})
except Exception:
pass
table.set_source_command("download-file", [])
results_payload: List[Dict[str, Any]] = []
for part in ("a", "b"):
file_title = f"{title} - part {part}"
file_path = f"{target.get('path')}/{part}"
sr = SearchResult(
table="hello",
title=file_title,
path=file_path,
detail=f"Part {part}",
media_kind="file",
columns=[("Part", part)],
full_metadata={"part": part, "example_index": idx},
)
table.add_result(sr)
try:
results_payload.append(sr.to_dict())
except Exception:
results_payload.append({"table": sr.table, "title": sr.title, "path": sr.path})
try:
ctx.set_last_result_table(table, results_payload)
ctx.set_current_stage_table(table)
except Exception:
pass
try:
stdout_console().print()
stdout_console().print(table)
except Exception:
pass
return True

View File

@@ -1224,6 +1224,9 @@ class LibgenSearch:
if results:
_call(log_info, f"[libgen] Using mirror: {mirror}")
return results
else:
_call(log_info, f"[libgen] Mirror returned 0 results; stopping mirror fallback")
break
except requests.exceptions.Timeout:
_call(log_info, f"[libgen] Mirror timed out: {mirror}")
continue

View File

@@ -304,7 +304,7 @@ class PodcastIndex(Provider):
pass
try:
from SYS.download import _download_direct_file
from API.HTTP import _download_direct_file
except Exception:
return True

442
Provider/torrent.py Normal file
View File

@@ -0,0 +1,442 @@
from __future__ import annotations
import logging
import re
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
import requests
from ProviderCore.base import Provider, SearchResult
from SYS.logger import debug, log
try: # Preferred HTML parser
from lxml import html as lxml_html
except Exception: # pragma: no cover - optional
lxml_html = None # type: ignore
logger = logging.getLogger(__name__)
@dataclass
class TorrentInfo:
name: str
url: str
seeders: int
leechers: int
size: str
source: str
category: Optional[str] = None
uploader: Optional[str] = None
magnet: Optional[str] = None
@dataclass
class SearchParams:
name: str
category: Optional[str] = None
order_column: Optional[str] = None
order_ascending: bool = False
_MAGNET_RE = re.compile(r"^magnet", re.IGNORECASE)
class Scraper:
def __init__(self, name: str, base_url: str, timeout: float = 10.0) -> None:
self.name = name
self.base = base_url.rstrip("/")
self.timeout = timeout
self.headers = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0 Safari/537.36"
)
}
self.params: Optional[SearchParams] = None
def find(self, params: SearchParams, pages: int = 1) -> List[TorrentInfo]:
self.params = params
results: List[TorrentInfo] = []
for page in range(1, max(1, pages) + 1):
try:
results.extend(self._get_page(page))
except Exception as exc:
debug(f"[{self.name}] page fetch failed: {exc}")
return results
def _get_page(self, page: int) -> List[TorrentInfo]:
url, payload = self._request_data(page)
try:
resp = requests.get(
url,
params=payload,
headers=self.headers,
timeout=self.timeout,
)
resp.raise_for_status()
return self._parse_search(resp)
except Exception as exc:
debug(f"[{self.name}] request failed: {exc}")
return []
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
return self.base, {}
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]: # pragma: no cover - interface
raise NotImplementedError
def _parse_detail(self, url: str) -> Optional[str]: # optional override
try:
resp = requests.get(url, headers=self.headers, timeout=self.timeout)
resp.raise_for_status()
return self._parse_detail_response(resp)
except Exception:
return None
def _parse_detail_response(self, response: requests.Response) -> Optional[str]: # pragma: no cover - interface
return None
@staticmethod
def _int_from_text(value: Any) -> int:
try:
return int(str(value).strip().replace(",", ""))
except Exception:
return 0
class NyaaScraper(Scraper):
def __init__(self) -> None:
super().__init__("nyaa.si", "https://nyaa.si")
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
params = self.params or SearchParams(name="")
payload = {
"p": page,
"q": params.name,
"c": params.category or "0_0",
"f": "0",
}
if params.order_column:
payload["s"] = params.order_column
payload["o"] = "asc" if params.order_ascending else "desc"
return f"{self.base}/", payload
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
if lxml_html is None:
return []
doc = lxml_html.fromstring(response.text)
rows = doc.xpath("//table//tbody/tr")
results: List[TorrentInfo] = []
for row in rows:
cells = row.xpath("./td")
if len(cells) < 7:
continue
category_cell, name_cell, links_cell, size_cell, _, seed_cell, leech_cell, *_ = cells
name_links = name_cell.xpath("./a")
name_tag = name_links[1] if len(name_links) > 1 else (name_links[0] if name_links else None)
if name_tag is None:
continue
name = name_tag.get("title") or (name_tag.text_content() or "").strip()
url = name_tag.get("href") or ""
magnet_link = None
magnet_candidates = links_cell.xpath('.//a[starts-with(@href,"magnet:")]/@href')
if magnet_candidates:
magnet_link = magnet_candidates[0]
category_title = None
cat_titles = category_cell.xpath(".//a/@title")
if cat_titles:
category_title = cat_titles[0]
results.append(
TorrentInfo(
name=name,
url=f"{self.base}{url}",
seeders=self._int_from_text(seed_cell.text_content()),
leechers=self._int_from_text(leech_cell.text_content()),
size=(size_cell.text_content() or "").strip(),
source=self.name,
category=category_title,
magnet=magnet_link,
)
)
return results
class X1337Scraper(Scraper):
def __init__(self) -> None:
super().__init__("1337x.to", "https://1337x.to")
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
params = self.params or SearchParams(name="")
order = None
if params.order_column:
direction = "asc" if params.order_ascending else "desc"
order = f"{params.order_column}/{direction}"
category = params.category
name = requests.utils.quote(params.name)
if order and category:
path = f"/sort-category-search/{name}/{category}/{order}"
elif category:
path = f"/category-search/{name}/{category}"
elif order:
path = f"/sort-search/{name}/{order}"
else:
path = f"/search/{name}"
url = f"{self.base}{path}/{page}/"
return url, {}
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
if lxml_html is None:
return []
doc = lxml_html.fromstring(response.text)
rows = doc.xpath("//table//tbody/tr")
results: List[TorrentInfo] = []
for row in rows:
cells = row.xpath("./td")
if len(cells) < 6:
continue
name_cell, seeds_cell, leech_cell, _, size_cell, uploader_cell = cells
links = name_cell.xpath(".//a")
if len(links) < 2:
continue
torrent_path = links[1].get("href")
torrent_url = f"{self.base}{torrent_path}" if torrent_path else ""
info = TorrentInfo(
name=(links[1].text_content() or "").strip(),
url=torrent_url,
seeders=self._int_from_text(seeds_cell.text_content()),
leechers=self._int_from_text(leech_cell.text_content()),
size=(size_cell.text_content() or "").strip().replace(",", ""),
source=self.name,
uploader=(uploader_cell.text_content() or "").strip() if uploader_cell is not None else None,
)
if not info.magnet:
info.magnet = self._parse_detail(info.url)
results.append(info)
return results
def _parse_detail_response(self, response: requests.Response) -> Optional[str]:
if lxml_html is None:
return None
doc = lxml_html.fromstring(response.text)
links = doc.xpath("//main//a[starts-with(@href,'magnet:')]/@href")
return links[0] if links else None
class YTSScraper(Scraper):
TRACKERS = "&tr=".join(
[
"udp://open.demonii.com:1337/announce",
"udp://tracker.opentrackr.org:1337/announce",
"udp://tracker.leechers-paradise.org:6969",
]
)
def __init__(self) -> None:
super().__init__("yts.mx", "https://yts.mx/api/v2")
self.headers = {}
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
params = self.params or SearchParams(name="")
payload = {
"limit": 50,
"page": page,
"query_term": params.name,
"sort_by": "seeds",
"order_by": "desc" if not params.order_ascending else "asc",
}
return f"{self.base}/list_movies.json", payload
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
results: List[TorrentInfo] = []
data = response.json()
if data.get("status") != "ok":
return results
movies = (data.get("data") or {}).get("movies") or []
for movie in movies:
torrents = movie.get("torrents") or []
if not torrents:
continue
tor = max(torrents, key=lambda t: t.get("seeds", 0))
name = movie.get("title") or "unknown"
info = TorrentInfo(
name=name,
url=str(movie.get("id") or ""),
seeders=int(tor.get("seeds", 0) or 0),
leechers=int(tor.get("peers", 0) or 0),
size=str(tor.get("size") or ""),
source=self.name,
category=(movie.get("genres") or [None])[0],
magnet=self._build_magnet(tor, name),
)
results.append(info)
return results
def _build_magnet(self, torrent: Dict[str, Any], name: str) -> str:
return (
f"magnet:?xt=urn:btih:{torrent.get('hash')}"
f"&dn={requests.utils.quote(name)}&tr={self.TRACKERS}"
)
class ApiBayScraper(Scraper):
"""Scraper for apibay.org (The Pirate Bay API clone)."""
def __init__(self) -> None:
super().__init__("apibay.org", "https://apibay.org")
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
_ = page # single-page API
params = self.params or SearchParams(name="")
return f"{self.base}/q.php", {"q": params.name}
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
results: List[TorrentInfo] = []
try:
data = response.json()
except Exception:
return results
if not isinstance(data, list):
return results
for item in data:
if not isinstance(item, dict):
continue
name = str(item.get("name") or "").strip()
info_hash = str(item.get("info_hash") or "").strip()
if not name or not info_hash:
continue
magnet = self._build_magnet(info_hash, name)
seeders = self._int_from_text(item.get("seeders"))
leechers = self._int_from_text(item.get("leechers"))
size_raw = str(item.get("size") or "").strip()
size_fmt = self._format_size(size_raw)
results.append(
TorrentInfo(
name=name,
url=f"{self.base}/description.php?id={item.get('id')}",
seeders=seeders,
leechers=leechers,
size=size_fmt,
source=self.name,
category=str(item.get("category") or ""),
uploader=str(item.get("username") or ""),
magnet=magnet,
)
)
return results
@staticmethod
def _build_magnet(info_hash: str, name: str) -> str:
return f"magnet:?xt=urn:btih:{info_hash}&dn={requests.utils.quote(name)}"
@staticmethod
def _format_size(size_raw: str) -> str:
try:
size_int = int(size_raw)
if size_int <= 0:
return size_raw
gb = size_int / (1024 ** 3)
if gb >= 1:
return f"{gb:.1f} GB"
mb = size_int / (1024 ** 2)
return f"{mb:.1f} MB"
except Exception:
return size_raw
class Torrent(Provider):
TABLE_AUTO_STAGES = {"torrent": ["download-file"]}
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
super().__init__(config)
self.scrapers: List[Scraper] = []
# JSON APIs (no lxml dependency)
self.scrapers.append(ApiBayScraper())
self.scrapers.append(YTSScraper())
# HTML scrapers require lxml
if lxml_html is not None:
self.scrapers.append(NyaaScraper())
self.scrapers.append(X1337Scraper())
else:
log("[torrent] lxml not installed; skipping Nyaa/1337x scrapers", file=None)
def validate(self) -> bool:
return bool(self.scrapers)
def search(
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
**_kwargs: Any,
) -> List[SearchResult]:
q = str(query or "").strip()
if not q:
return []
params = SearchParams(name=q, order_column="seeders", order_ascending=False)
results: List[TorrentInfo] = []
for scraper in self.scrapers:
try:
scraped = scraper.find(params, pages=1)
results.extend(scraped)
except Exception as exc:
debug(f"[torrent] scraper {scraper.name} failed: {exc}")
continue
results = sorted(results, key=lambda r: r.seeders, reverse=True)
if limit and limit > 0:
results = results[:limit]
out: List[SearchResult] = []
for item in results:
path = item.magnet or item.url
columns = [
("TITLE", item.name),
("Seeds", str(item.seeders)),
("Leechers", str(item.leechers)),
("Size", item.size or ""),
("Source", item.source),
]
if item.uploader:
columns.append(("Uploader", item.uploader))
md = {
"magnet": item.magnet,
"url": item.url,
"source": item.source,
"seeders": item.seeders,
"leechers": item.leechers,
"size": item.size,
}
if item.uploader:
md["uploader"] = item.uploader
out.append(
SearchResult(
table="torrent",
title=item.name,
path=path,
detail=f"Seeds:{item.seeders} | Size:{item.size}",
annotations=[item.source],
media_kind="other",
columns=columns,
full_metadata=md,
tag={"torrent"},
)
)
return out

185
Provider/vimm.py Normal file
View File

@@ -0,0 +1,185 @@
"""Vimm provider skeleton (lxml + HTTPClient).
This is a lightweight, resilient provider implementation intended as a
starting point for implementing a full Vimm (vimm.net) provider.
It prefers server-rendered HTML parsing via lxml and uses the repo's
`HTTPClient` helper for robust HTTP calls (timeouts/retries).
Selectors in `search()` are intentionally permissive heuristics; update the
XPaths to match the real site HTML when you have an actual fixture.
"""
from __future__ import annotations
import re
import sys
from typing import Any, Dict, List, Optional
from urllib.parse import urljoin, quote_plus
from lxml import html as lxml_html
from API.HTTP import HTTPClient
from ProviderCore.base import Provider, SearchResult
from SYS.logger import log, debug
class Vimm(Provider):
"""Provider for vimm.net vault listings (skeleton).
- Uses lxml for parsing
- No authentication required
"""
URL = ("https://vimm.net/vault/",)
URL_DOMAINS = ("vimm.net",)
def validate(self) -> bool:
# This provider has no required config; consider more checks if needed.
return True
def _parse_size_bytes(self, size_str: str) -> Optional[int]:
if not size_str:
return None
try:
s = str(size_str or "").strip().replace(",", "")
m = re.search(r"(?P<val>[\d\.]+)\s*(?P<unit>[KMGT]?B)?", s, flags=re.I)
if not m:
return None
val = float(m.group("val"))
unit = (m.group("unit") or "B").upper()
mul = {
"B": 1,
"KB": 1024,
"MB": 1024 ** 2,
"GB": 1024 ** 3,
"TB": 1024 ** 4,
}.get(unit, 1)
return int(val * mul)
except Exception:
return None
def search(
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
q = (query or "").strip()
if not q:
return []
# Build search/list URL
base = "https://vimm.net/vault/"
url = f"{base}?p=list&q={quote_plus(q)}"
try:
with HTTPClient(timeout=20.0) as client:
resp = client.get(url)
content = resp.content
except Exception as exc:
log(f"[vimm] HTTP fetch failed: {exc}", file=sys.stderr)
return []
try:
doc = lxml_html.fromstring(content)
except Exception as exc:
log(f"[vimm] HTML parse failed: {exc}", file=sys.stderr)
return []
results: List[SearchResult] = []
# Candidate XPaths for list items (tweak to match real DOM)
container_xpaths = [
'//div[contains(@class,"list-item")]',
'//div[contains(@class,"result")]',
'//li[contains(@class,"item")]',
'//tr[contains(@class,"result")]',
'//article',
]
nodes = []
for xp in container_xpaths:
try:
found = doc.xpath(xp)
if found:
nodes = found
debug(f"[vimm] using xpath {xp} -> {len(found)} nodes")
break
except Exception:
continue
# Fallback: try generic anchors under a list area
if not nodes:
try:
nodes = doc.xpath('//div[contains(@id,"list")]/div') or doc.xpath('//div[contains(@class,"results")]/div')
except Exception:
nodes = []
for n in (nodes or [])[: max(1, int(limit))]:
try:
# Prefer explicit title anchors
title = None
href = None
try:
# a few heuristic searches for a meaningful anchor
a = (n.xpath('.//a[contains(@class,"title")]') or
n.xpath('.//h2/a') or
n.xpath('.//a[contains(@href,"/vault/")]') or
n.xpath('.//a'))
if a:
a0 = a[0]
title = a0.text_content().strip()
href = a0.get('href')
except Exception:
title = None
href = None
if not title:
title = (n.text_content() or "").strip()
path = urljoin(base, href) if href else ""
# Extract size & platform heuristics
size_text = ""
try:
s = n.xpath('.//*[contains(@class,"size")]/text()') or n.xpath('.//span[contains(text(),"MB") or contains(text(),"GB")]/text()')
if s:
size_text = str(s[0]).strip()
except Exception:
size_text = ""
size_bytes = self._parse_size_bytes(size_text)
platform = ""
try:
p = n.xpath('.//*[contains(@class,"platform")]/text()')
if p:
platform = str(p[0]).strip()
except Exception:
platform = ""
columns = []
if platform:
columns.append(("Platform", platform))
if size_text:
columns.append(("Size", size_text))
results.append(
SearchResult(
table="vimm",
title=str(title or "").strip(),
path=str(path or ""),
detail="",
annotations=[],
media_kind="file",
size_bytes=size_bytes,
tag={"vimm"},
columns=columns,
full_metadata={"raw": lxml_html.tostring(n, encoding="unicode")},
)
)
except Exception:
continue
return results[: max(0, int(limit))]

View File

@@ -26,8 +26,7 @@ class SearchResult:
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for pipeline processing."""
return {
out = {
"table": self.table,
"title": self.title,
"path": self.path,
@@ -40,6 +39,15 @@ class SearchResult:
"full_metadata": self.full_metadata,
}
try:
url_value = getattr(self, "url", None)
if url_value is not None:
out["url"] = url_value
except Exception:
pass
return out
class Provider(ABC):
"""Unified provider base class.

View File

@@ -1,75 +1,238 @@
"""Provider registry.
Concrete provider implementations live in the `Provider/` package.
This module is the single source of truth for provider discovery.
Concrete provider implementations live in the ``Provider`` package. This module
is the single source of truth for discovery, metadata, and lifecycle helpers
for those plugins.
"""
from __future__ import annotations
from typing import Any, Dict, Optional, Sequence, Type
import importlib
import pkgutil
import sys
from dataclasses import dataclass, field
from types import ModuleType
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Type
from urllib.parse import urlparse
from SYS.logger import log
from ProviderCore.base import Provider, SearchProvider, FileProvider, SearchResult
from Provider.alldebrid import AllDebrid
from Provider.bandcamp import Bandcamp
from Provider.libgen import Libgen
from Provider.matrix import Matrix
from Provider.openlibrary import OpenLibrary
from Provider.soulseek import Soulseek, download_soulseek_file
from Provider.telegram import Telegram
from Provider.youtube import YouTube
from Provider.fileio import FileIO
from Provider.zeroxzero import ZeroXZero
from Provider.loc import LOC
from Provider.internetarchive import InternetArchive
from Provider.podcastindex import PodcastIndex
from Provider.HIFI import HIFI
from ProviderCore.base import FileProvider, Provider, SearchProvider, SearchResult
from Provider.soulseek import download_soulseek_file
_PROVIDERS: Dict[str,
Type[Provider]] = {
# Search-capable providers
"alldebrid": AllDebrid,
"libgen": Libgen,
"openlibrary": OpenLibrary,
"internetarchive": InternetArchive,
"hifi": HIFI,
"soulseek": Soulseek,
"bandcamp": Bandcamp,
"youtube": YouTube,
"telegram": Telegram,
"loc": LOC,
"podcastindex": PodcastIndex,
# Upload-capable providers
"0x0": ZeroXZero,
"file.io": FileIO,
"matrix": Matrix,
}
@dataclass(frozen=True)
class ProviderInfo:
"""Metadata about a single provider entry."""
canonical_name: str
provider_class: Type[Provider]
module: str
alias_names: Tuple[str, ...] = field(default_factory=tuple)
@property
def supports_search(self) -> bool:
return self.provider_class.search is not Provider.search
@property
def supports_upload(self) -> bool:
return self.provider_class.upload is not Provider.upload
class ProviderRegistry:
"""Handles discovery, registration, and lookup of provider classes."""
def __init__(self, package_name: str) -> None:
self.package_name = (package_name or "").strip()
self._infos: Dict[str, ProviderInfo] = {}
self._lookup: Dict[str, ProviderInfo] = {}
self._modules: set[str] = set()
self._discovered = False
def _normalize(self, value: Any) -> str:
return str(value or "").strip().lower()
def _candidate_names(self,
provider_class: Type[Provider],
override_name: Optional[str]) -> List[str]:
names: List[str] = []
seen: set[str] = set()
def _add(value: Any) -> None:
text = str(value or "").strip()
normalized = text.lower()
if not text or normalized in seen:
return
seen.add(normalized)
names.append(text)
if override_name:
_add(override_name)
else:
_add(getattr(provider_class, "PROVIDER_NAME", None))
_add(getattr(provider_class, "NAME", None))
_add(getattr(provider_class, "__name__", None))
for alias in getattr(provider_class, "PROVIDER_ALIASES", ()) or ():
_add(alias)
return names
def register(
self,
provider_class: Type[Provider],
*,
override_name: Optional[str] = None,
extra_aliases: Optional[Sequence[str]] = None,
module_name: Optional[str] = None,
replace: bool = False,
) -> ProviderInfo:
"""Register a provider class with canonical and alias names."""
candidates = self._candidate_names(provider_class, override_name)
if not candidates:
raise ValueError("provider name candidates are required")
canonical = self._normalize(candidates[0])
if not canonical:
raise ValueError("provider name must not be empty")
alias_names: List[str] = []
alias_seen: set[str] = set()
for candidate in candidates[1:]:
normalized = self._normalize(candidate)
if not normalized or normalized == canonical or normalized in alias_seen:
continue
alias_seen.add(normalized)
alias_names.append(normalized)
for alias in extra_aliases or ():
normalized = self._normalize(alias)
if not normalized or normalized == canonical or normalized in alias_seen:
continue
alias_seen.add(normalized)
alias_names.append(normalized)
info = ProviderInfo(
canonical_name=canonical,
provider_class=provider_class,
module=module_name or getattr(provider_class, "__module__", "") or "",
alias_names=tuple(alias_names),
)
existing = self._infos.get(canonical)
if existing is not None and not replace:
return existing
self._infos[canonical] = info
for lookup in (canonical,) + tuple(alias_names):
self._lookup[lookup] = info
return info
def _register_module(self, module: ModuleType) -> None:
module_name = getattr(module, "__name__", "")
if not module_name or module_name in self._modules:
return
self._modules.add(module_name)
for attr in dir(module):
candidate = getattr(module, attr)
if not isinstance(candidate, type):
continue
if not issubclass(candidate, Provider):
continue
if candidate in {Provider, SearchProvider, FileProvider}:
continue
if getattr(candidate, "__module__", "") != module_name:
continue
try:
self.register(candidate, module_name=module_name)
except Exception as exc:
log(f"[provider] Failed to register {module_name}.{candidate.__name__}: {exc}", file=sys.stderr)
def discover(self) -> None:
"""Import and register providers from the package."""
if self._discovered or not self.package_name:
return
self._discovered = True
try:
package = importlib.import_module(self.package_name)
except Exception as exc:
log(f"[provider] Failed to import package {self.package_name}: {exc}", file=sys.stderr)
return
self._register_module(package)
package_path = getattr(package, "__path__", None)
if not package_path:
return
for finder, module_name, _ in pkgutil.iter_modules(package_path):
if module_name.startswith("_"):
continue
module_path = f"{self.package_name}.{module_name}"
try:
module = importlib.import_module(module_path)
except Exception as exc:
log(f"[provider] Failed to load {module_path}: {exc}", file=sys.stderr)
continue
self._register_module(module)
def get(self, name: str) -> Optional[ProviderInfo]:
self.discover()
if not name:
return None
return self._lookup.get(self._normalize(name))
def iter_providers(self) -> Iterable[ProviderInfo]:
self.discover()
return tuple(self._infos.values())
def has_name(self, name: str) -> bool:
return self.get(name) is not None
REGISTRY = ProviderRegistry("Provider")
REGISTRY.discover()
def register_provider(
provider_class: Type[Provider],
*,
name: Optional[str] = None,
aliases: Optional[Sequence[str]] = None,
module_name: Optional[str] = None,
replace: bool = False,
) -> ProviderInfo:
"""Register a provider class from tests or third-party packages."""
return REGISTRY.register(
provider_class,
override_name=name,
extra_aliases=aliases,
module_name=module_name,
replace=replace,
)
def get_provider_class(name: str) -> Optional[Type[Provider]]:
"""Return the provider class for a registered provider name, if any."""
key = str(name or "").strip().lower()
return _PROVIDERS.get(key)
info = REGISTRY.get(name)
if info is None:
return None
return info.provider_class
def selection_auto_stage_for_table(
table_type: str,
stage_args: Optional[Sequence[str]] = None,
) -> Optional[list[str]]:
"""Return the provider-suggested stage to auto-run for a selected table.
This is used by the CLI to avoid hardcoding table names and behaviors.
"""
t = str(table_type or "").strip().lower()
if not t:
return None
# Provider tables are usually either:
# - "youtube" (no dot)
# - "hifi.tracks" (prefix = provider name)
provider_key = t.split(".", 1)[0] if "." in t else t
provider_class = get_provider_class(provider_key) or get_provider_class(t)
if provider_class is None:
@@ -82,14 +245,7 @@ def selection_auto_stage_for_table(
def is_known_provider_name(name: str) -> bool:
"""Return True if `name` matches a registered provider key.
This is intentionally cheap (no imports/instantiation) so callers can
probe UI strings (table names, store names, etc.) without triggering
noisy 'Unknown provider' logs.
"""
return (name or "").strip().lower() in _PROVIDERS
return REGISTRY.has_name(name)
def _supports_search(provider: Provider) -> bool:
@@ -107,18 +263,14 @@ def _provider_url_patterns(provider_class: Type[Provider]) -> Sequence[str]:
return []
def get_provider(name: str,
config: Optional[Dict[str,
Any]] = None) -> Optional[Provider]:
"""Get a provider by name (unified registry)."""
provider_class = _PROVIDERS.get((name or "").lower())
if provider_class is None:
def get_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[Provider]:
info = REGISTRY.get(name)
if info is None:
log(f"[provider] Unknown provider: {name}", file=sys.stderr)
return None
try:
provider = provider_class(config)
provider = info.provider_class(config)
if not provider.validate():
log(f"[provider] Provider '{name}' is not available", file=sys.stderr)
return None
@@ -129,24 +281,18 @@ def get_provider(name: str,
def list_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
"""List all providers and their availability."""
availability: Dict[str,
bool] = {}
for name, provider_class in _PROVIDERS.items():
availability: Dict[str, bool] = {}
for info in REGISTRY.iter_providers():
try:
provider = provider_class(config)
availability[name] = provider.validate()
provider = info.provider_class(config)
availability[info.canonical_name] = provider.validate()
except Exception:
availability[name] = False
availability[info.canonical_name] = False
return availability
def get_search_provider(name: str,
config: Optional[Dict[str,
Any]] = None) -> Optional[SearchProvider]:
"""Get a search-capable provider by name (compat API)."""
config: Optional[Dict[str, Any]] = None) -> Optional[SearchProvider]:
provider = get_provider(name, config)
if provider is None:
return None
@@ -157,26 +303,20 @@ def get_search_provider(name: str,
def list_search_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
"""List all search providers and their availability."""
availability: Dict[str,
bool] = {}
for name, provider_class in _PROVIDERS.items():
availability: Dict[str, bool] = {}
for info in REGISTRY.iter_providers():
try:
provider = provider_class(config)
availability[name] = bool(
provider.validate() and _supports_search(provider)
provider = info.provider_class(config)
availability[info.canonical_name] = bool(
provider.validate() and info.supports_search
)
except Exception:
availability[name] = False
availability[info.canonical_name] = False
return availability
def get_file_provider(name: str,
config: Optional[Dict[str,
Any]] = None) -> Optional[FileProvider]:
"""Get an upload-capable provider by name (compat API)."""
config: Optional[Dict[str, Any]] = None) -> Optional[FileProvider]:
provider = get_provider(name, config)
if provider is None:
return None
@@ -187,28 +327,19 @@ def get_file_provider(name: str,
def list_file_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
"""List all file providers and their availability."""
availability: Dict[str,
bool] = {}
for name, provider_class in _PROVIDERS.items():
availability: Dict[str, bool] = {}
for info in REGISTRY.iter_providers():
try:
provider = provider_class(config)
availability[name] = bool(
provider.validate() and _supports_upload(provider)
provider = info.provider_class(config)
availability[info.canonical_name] = bool(
provider.validate() and info.supports_upload
)
except Exception:
availability[name] = False
availability[info.canonical_name] = False
return availability
def match_provider_name_for_url(url: str) -> Optional[str]:
"""Return a registered provider name that claims the URL's domain.
Providers can declare domains via class attribute `URL` (preferred) or `URL_DOMAINS`.
This matcher is intentionally cheap (no provider instantiation, no network).
"""
raw_url = str(url or "").strip()
raw_url_lower = raw_url.lower()
try:
@@ -219,11 +350,6 @@ def match_provider_name_for_url(url: str) -> Optional[str]:
host = ""
path = ""
# Prefer Internet Archive for archive.org links unless the URL clearly refers
# to a borrow/loan flow (handled by OpenLibrary provider).
#
# This keeps direct downloads and item pages routed to `internetarchive`, while
# preserving OpenLibrary's scripted borrow pipeline for loan/reader URLs.
def _norm_host(h: str) -> str:
h_norm = str(h or "").strip().lower()
if h_norm.startswith("www."):
@@ -234,47 +360,45 @@ def match_provider_name_for_url(url: str) -> Optional[str]:
if host_norm:
if host_norm == "openlibrary.org" or host_norm.endswith(".openlibrary.org"):
return "openlibrary" if "openlibrary" in _PROVIDERS else None
return "openlibrary" if REGISTRY.has_name("openlibrary") else None
if host_norm == "archive.org" or host_norm.endswith(".archive.org"):
low_path = str(path or "").lower()
is_borrowish = (
low_path.startswith("/borrow/") or low_path.startswith("/stream/")
or low_path.startswith("/services/loans/") or "/services/loans/" in low_path
low_path.startswith("/borrow/")
or low_path.startswith("/stream/")
or low_path.startswith("/services/loans/")
or "/services/loans/" in low_path
)
if is_borrowish:
return "openlibrary" if "openlibrary" in _PROVIDERS else None
return "internetarchive" if "internetarchive" in _PROVIDERS else None
return "openlibrary" if REGISTRY.has_name("openlibrary") else None
return "internetarchive" if REGISTRY.has_name("internetarchive") else None
for name, provider_class in _PROVIDERS.items():
domains = _provider_url_patterns(provider_class)
for info in REGISTRY.iter_providers():
domains = _provider_url_patterns(info.provider_class)
if not domains:
continue
for d in domains:
dom_raw = str(d or "").strip()
for domain in domains:
dom_raw = str(domain or "").strip()
dom = dom_raw.lower()
if not dom:
continue
# Scheme-like patterns (magnet:, http://example) still use prefix match.
if dom.startswith("magnet:") or dom.startswith("http://") or dom.startswith("https://"):
if raw_url_lower.startswith(dom):
return name
return info.canonical_name
continue
dom_norm = _norm_host(dom)
if not dom_norm or not host_norm:
continue
if host_norm == dom_norm or host_norm.endswith("." + dom_norm):
return name
return info.canonical_name
return None
def get_provider_for_url(url: str,
config: Optional[Dict[str,
Any]] = None) -> Optional[Provider]:
"""Instantiate and return the matching provider for a URL, if any."""
config: Optional[Dict[str, Any]] = None) -> Optional[Provider]:
name = match_provider_name_for_url(url)
if not name:
return None
@@ -282,10 +406,12 @@ def get_provider_for_url(url: str,
__all__ = [
"SearchResult",
"ProviderInfo",
"Provider",
"SearchProvider",
"FileProvider",
"SearchResult",
"register_provider",
"get_provider",
"list_providers",
"get_search_provider",
@@ -294,7 +420,7 @@ __all__ = [
"list_file_providers",
"match_provider_name_for_url",
"get_provider_for_url",
"download_soulseek_file",
"get_provider_class",
"selection_auto_stage_for_table",
"download_soulseek_file",
]

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,234 +0,0 @@
"""Background task handling and IPC helpers for mpv integration."""
from __future__ import annotations
import errno
import json
import os
import socket
import subprocess
import sys
from SYS.logger import log
import threading
import time
from typing import IO, Iterable
def connect_ipc(path: str, timeout: float = 5.0) -> IO[bytes] | None:
"""Connect to the mpv IPC server located at *path*."""
deadline = time.time() + timeout
if not path:
return None
if os.name == "nt":
# mpv exposes a named pipe on Windows. Keep retrying until it is ready.
while True:
try:
return open(path, "r+b", buffering=0)
except FileNotFoundError:
if time.time() > deadline:
return None
time.sleep(0.05)
except OSError as exc: # Pipe busy
# Windows named pipes can intermittently raise EINVAL while the pipe exists
# but is not ready/accepting connections yet.
if exc.errno not in (errno.ENOENT,
errno.EPIPE,
errno.EBUSY,
errno.EINVAL):
raise
if time.time() > deadline:
return None
time.sleep(0.05)
else:
sock = socket.socket(socket.AF_UNIX)
while True:
try:
sock.connect(path)
return sock.makefile("r+b", buffering=0)
except FileNotFoundError:
if time.time() > deadline:
return None
time.sleep(0.05)
except OSError as exc:
if exc.errno not in (errno.ENOENT, errno.ECONNREFUSED):
raise
if time.time() > deadline:
return None
time.sleep(0.05)
def ipc_sender(ipc: IO[bytes] | None):
"""Create a helper function for sending script messages via IPC."""
if ipc is None:
def _noop(_event: str, _payload: dict) -> None:
return None
return _noop
lock = threading.Lock()
def _send(event: str, payload: dict) -> None:
message = json.dumps(
{
"command": ["script-message",
event,
json.dumps(payload)]
},
ensure_ascii=False
)
encoded = message.encode("utf-8") + b"\n"
with lock:
try:
ipc.write(encoded)
ipc.flush()
except OSError:
pass
return _send
def iter_stream(stream: Iterable[str]) -> Iterable[str]:
for raw in stream:
yield raw.rstrip("\r\n")
def _run_task(args, parser) -> int:
if not args.command:
parser.error(
'run-task requires a command to execute (use "--" before the command).'
)
env = os.environ.copy()
for entry in args.env:
key, sep, value = entry.partition("=")
if not sep:
parser.error(f"Invalid environment variable definition: {entry!r}")
env[key] = value
command = list(args.command)
if command and command[0] == "--":
command.pop(0)
notifier = ipc_sender(connect_ipc(args.ipc, timeout=args.ipc_timeout))
if not command:
notifier(
"downlow-task-event",
{
"id": args.task_id,
"event": "error",
"message": "No command provided after separator",
},
)
log("[downlow.py] No command provided for run-task", file=sys.stderr)
return 1
if command and isinstance(command[0], str) and sys.executable:
first = command[0].lower()
if first in {"python",
"python3",
"py",
"python.exe",
"python3.exe",
"py.exe"}:
command[0] = sys.executable
if os.environ.get("DOWNLOW_DEBUG"):
log(f"Launching command: {command}", file=sys.stderr)
notifier(
"downlow-task-event",
{
"id": args.task_id,
"event": "start",
"command": command,
"cwd": args.cwd or os.getcwd(),
},
)
popen_kwargs = {}
if os.name == "nt":
# Avoid flashing a console window when spawning console-subsystem executables.
flags = 0
try:
flags |= int(getattr(subprocess, "CREATE_NO_WINDOW", 0x08000000))
except Exception:
flags |= 0x08000000
popen_kwargs["creationflags"] = flags
try:
si = subprocess.STARTUPINFO()
si.dwFlags |= subprocess.STARTF_USESHOWWINDOW
si.wShowWindow = subprocess.SW_HIDE
popen_kwargs["startupinfo"] = si
except Exception:
pass
try:
process = subprocess.Popen(
command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=args.cwd or None,
env=env,
text=True,
bufsize=1,
universal_newlines=True,
**popen_kwargs,
)
except FileNotFoundError as exc:
notifier(
"downlow-task-event",
{
"id": args.task_id,
"event": "error",
"message": f"Executable not found: {exc.filename}",
},
)
log(f"{exc}", file=sys.stderr)
return 1
stdout_lines: list[str] = []
stderr_lines: list[str] = []
def pump(stream: IO[str], label: str, sink: list[str]) -> None:
for line in iter_stream(stream):
sink.append(line)
notifier(
"downlow-task-event",
{
"id": args.task_id,
"event": label,
"line": line,
},
)
threads = []
if process.stdout:
t_out = threading.Thread(
target=pump,
args=(process.stdout,
"stdout",
stdout_lines),
daemon=True
)
t_out.start()
threads.append(t_out)
if process.stderr:
t_err = threading.Thread(
target=pump,
args=(process.stderr,
"stderr",
stderr_lines),
daemon=True
)
t_err.start()
threads.append(t_err)
return_code = process.wait()
for t in threads:
t.join(timeout=0.1)
notifier(
"downlow-task-event",
{
"id": args.task_id,
"event": "exit",
"returncode": return_code,
"success": return_code == 0,
},
)
# Also mirror aggregated output to stdout/stderr for compatibility when IPC is unavailable.
if stdout_lines:
log("\n".join(stdout_lines))
if stderr_lines:
log("\n".join(stderr_lines), file=sys.stderr)
return return_code

View File

@@ -142,6 +142,8 @@ class Store:
BaseStore] = {}
self._backend_errors: Dict[str,
str] = {}
self._backend_types: Dict[str,
str] = {}
self._load_backends()
def _maybe_register_temp_alias(
@@ -179,6 +181,7 @@ class Store:
# Keep original name working, but add an alias.
if backend_name != "temp":
self._backends["temp"] = backend
self._backend_types["temp"] = store_type
except Exception:
return
@@ -187,6 +190,7 @@ class Store:
if not isinstance(store_cfg, dict):
store_cfg = {}
self._backend_types = {}
classes_by_type = _discover_store_classes()
for raw_store_type, instances in store_cfg.items():
if not isinstance(instances, dict):
@@ -232,6 +236,7 @@ class Store:
backend_name = str(kwargs.get("NAME") or instance_name)
self._backends[backend_name] = backend
self._backend_types[backend_name] = store_type
# If this is the configured temp directory, also alias it as 'temp'.
self._maybe_register_temp_alias(
@@ -249,6 +254,47 @@ class Store:
f"[Store] Failed to register {store_cls.__name__} instance '{instance_name}': {exc}"
)
def _resolve_backend_name(self,
backend_name: str) -> tuple[Optional[str], Optional[str]]:
requested = str(backend_name or "")
if requested in self._backends:
return requested, None
requested_norm = _normalize_store_type(requested)
ci_matches = [
name for name in self._backends
if _normalize_store_type(name) == requested_norm
]
if len(ci_matches) == 1:
return ci_matches[0], None
if len(ci_matches) > 1:
return None, f"Ambiguous store alias '{backend_name}' matches {ci_matches}"
type_matches = [
name for name, store_type in self._backend_types.items()
if store_type == requested_norm
]
if len(type_matches) == 1:
return type_matches[0], None
if len(type_matches) > 1:
return None, (
f"Ambiguous store alias '{backend_name}' matches type '{requested_norm}': {type_matches}"
)
prefix_matches = [
name for name, store_type in self._backend_types.items()
if store_type.startswith(requested_norm)
]
if len(prefix_matches) == 1:
return prefix_matches[0], None
if len(prefix_matches) > 1:
return None, (
f"Ambiguous store alias '{backend_name}' matches type prefix '{requested_norm}': {prefix_matches}"
)
return None, None
def get_backend_error(self, backend_name: str) -> Optional[str]:
return self._backend_errors.get(str(backend_name))
@@ -277,14 +323,20 @@ class Store:
return sorted(chosen.values())
def __getitem__(self, backend_name: str) -> BaseStore:
if backend_name not in self._backends:
resolved, err = self._resolve_backend_name(backend_name)
if resolved:
return self._backends[resolved]
if err:
raise KeyError(
f"Unknown store backend: {backend_name}. Available: {list(self._backends.keys())}"
f"Unknown store backend: {backend_name}. {err}"
)
return self._backends[backend_name]
raise KeyError(
f"Unknown store backend: {backend_name}. Available: {list(self._backends.keys())}"
)
def is_available(self, backend_name: str) -> bool:
return backend_name in self._backends
resolved, _err = self._resolve_backend_name(backend_name)
return resolved is not None
def try_add_url_for_pipe_object(self, pipe_obj: Any, url: str) -> bool:
"""Best-effort helper: if `pipe_obj` contains `store` + `hash`, add `url` to that store backend.

View File

@@ -244,7 +244,7 @@ class SharedArgs:
description="Destination location",
)
DELETE_FLAG = CmdletArg(
DELETE = CmdletArg(
"delete",
type="flag",
description="Delete the file and its .tag after successful operation.",
@@ -2081,6 +2081,12 @@ def extract_url_from_result(result: Any) -> list[str]:
_extend(result.metadata.get("url"))
_extend(result.metadata.get("url"))
_extend(result.metadata.get("url"))
if isinstance(getattr(result, "full_metadata", None), dict):
fm = getattr(result, "full_metadata", None)
if isinstance(fm, dict):
_extend(fm.get("url"))
_extend(fm.get("url"))
_extend(fm.get("url"))
elif hasattr(result, "url") or hasattr(result, "url"):
# Handle objects with url/url attribute
_extend(getattr(result, "url", None))
@@ -2090,6 +2096,11 @@ def extract_url_from_result(result: Any) -> list[str]:
_extend(result.get("url"))
_extend(result.get("url"))
_extend(result.get("url"))
fm = result.get("full_metadata")
if isinstance(fm, dict):
_extend(fm.get("url"))
_extend(fm.get("url"))
_extend(fm.get("url"))
extra = result.get("extra")
if isinstance(extra, dict):
_extend(extra.get("url"))
@@ -2531,6 +2542,30 @@ def resolve_tidal_manifest_path(item: Any) -> Optional[str]:
metadata["_tidal_track_details_fetched"] = True
except Exception:
pass
if not metadata.get("url"):
try:
resp_info = httpx.get(
"https://tidal-api.binimum.org/info/",
params={"id": str(track_int)},
timeout=10.0,
)
resp_info.raise_for_status()
info_payload = resp_info.json()
info_data = info_payload.get("data") if isinstance(info_payload, dict) else None
if isinstance(info_data, dict) and info_data:
try:
for k, v in info_data.items():
if k not in metadata:
metadata[k] = v
except Exception:
pass
try:
if info_data.get("url"):
metadata["url"] = info_data.get("url")
except Exception:
pass
except Exception:
pass
except Exception:
pass

View File

@@ -345,6 +345,14 @@ class Add_File(Cmdlet):
else:
items_to_process = [result]
total_items = len(items_to_process) if isinstance(items_to_process, list) else 0
processed_items = 0
try:
if total_items:
progress.set_percent(0)
except Exception:
pass
# Minimal step-based progress for single-item runs.
# Many add-file flows don't emit intermediate items, so without steps the pipe can look "stuck".
use_steps = False
@@ -496,9 +504,25 @@ class Add_File(Cmdlet):
and len(items_to_process) > 1
)
for item in items_to_process:
for idx, item in enumerate(items_to_process, 1):
pipe_obj = coerce_to_pipe_object(item, path_arg)
try:
label = pipe_obj.title or pipe_obj.name
if not label and pipe_obj.path:
try:
label = Path(str(pipe_obj.path)).name
except Exception:
label = pipe_obj.path
if not label:
label = "file"
if total_items:
pending_pct = int(round(((idx - 1) / max(1, total_items)) * 100))
progress.set_percent(pending_pct)
progress.set_status(f"adding {idx}/{total_items}: {label}")
except Exception:
pass
temp_dir_to_cleanup: Optional[Path] = None
delete_after_item = delete_after
try:
@@ -597,6 +621,14 @@ class Add_File(Cmdlet):
shutil.rmtree(temp_dir_to_cleanup, ignore_errors=True)
except Exception:
pass
processed_items += 1
try:
pct = int(round((processed_items / max(1, total_items)) * 100))
progress.set_percent(pct)
if processed_items >= total_items:
progress.clear_status()
except Exception:
pass
# Apply deferred url associations (bulk) before showing the final store table.
if pending_url_associations:

289
cmdlet/convert_file.py Normal file
View File

@@ -0,0 +1,289 @@
from __future__ import annotations
from typing import Any, Dict, Sequence, Optional
from pathlib import Path
import sys
import shutil
import subprocess
from SYS.logger import log, debug
from SYS.utils import sha256_file
from . import _shared as sh
from SYS import pipeline as ctx
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
QueryArg = sh.QueryArg
SharedArgs = sh.SharedArgs
parse_cmdlet_args = sh.parse_cmdlet_args
normalize_result_input = sh.normalize_result_input
extract_title_from_result = sh.extract_title_from_result
VIDEO_EXTS = {
"mp4",
"mkv",
"webm",
"mov",
"avi",
"flv",
"mpeg",
"mpg",
"m4v",
}
AUDIO_EXTS = {
"mp3",
"m4a",
"m4b",
"aac",
"flac",
"wav",
"ogg",
"opus",
"mka",
}
IMAGE_EXTS = {
"png",
"jpg",
"jpeg",
"webp",
"bmp",
"tif",
"tiff",
"gif",
}
DOC_EXTS = {
"pdf",
"mobi",
"epub",
"azw3",
"txt",
"rtf",
"html",
"htm",
"md",
"doc",
"docx",
}
def _detect_kind(ext: str) -> str:
e = ext.lower().lstrip(".")
if e in VIDEO_EXTS:
return "video"
if e in AUDIO_EXTS:
return "audio"
if e in IMAGE_EXTS:
return "image"
if e in DOC_EXTS:
return "doc"
return "unknown"
def _allowed(source_kind: str, target_kind: str) -> bool:
if source_kind == target_kind:
return True
if source_kind == "video" and target_kind == "audio":
return True
return False
def _ffmpeg_convert(
input_path: Path,
output_path: Path,
target_kind: str,
copy_metadata: bool,
) -> bool:
ffmpeg_path = shutil.which("ffmpeg")
if not ffmpeg_path:
log("ffmpeg not found in PATH", file=sys.stderr)
return False
cmd = [ffmpeg_path, "-y", "-i", str(input_path)]
if target_kind == "audio":
cmd.extend(["-vn"])
if copy_metadata:
cmd.extend(["-map_metadata", "0"])
cmd.append(str(output_path))
debug(f"[convert-file] Running ffmpeg: {' '.join(cmd)}")
proc = subprocess.run(cmd, capture_output=True, text=True)
if proc.returncode != 0:
log(f"ffmpeg error: {proc.stderr}", file=sys.stderr)
return False
return True
def _doc_convert(input_path: Path, output_path: Path) -> bool:
try:
import pypandoc # type: ignore
except Exception:
log("pypandoc is required for document conversion; install pypandoc-binary", file=sys.stderr)
return False
target_fmt = output_path.suffix.lstrip(".").lower() or "pdf"
try:
pypandoc.convert_file(
str(input_path),
to=target_fmt,
outputfile=str(output_path),
)
except OSError as exc:
log(f"pandoc is missing or failed to run: {exc}", file=sys.stderr)
return False
except Exception as exc:
log(f"pypandoc conversion failed: {exc}", file=sys.stderr)
return False
if not output_path.exists():
log("pypandoc conversion did not produce an output file", file=sys.stderr)
return False
return True
CMDLET = Cmdlet(
name="convert-file",
summary="Convert files between media/container formats (video, audio, image, documents).",
usage="convert-file -to <format> [-path <file|dir>] [-delete] [-query format:<fmt>]",
arg=[
QueryArg("to", key="format", query_only=False, required=True,
description="Target format/extension (e.g., mp4, mp3, wav, jpg, pdf)."),
SharedArgs.PATH,
SharedArgs.QUERY,
SharedArgs.DELETE,
],
detail=[
"Allows video↔video, audio↔audio, image↔image, doc↔doc, and video→audio conversions.",
"Disallows incompatible conversions (e.g., video→pdf).",
"Uses ffmpeg for media and pypandoc-binary (bundled pandoc) for document formats (mobi/epub→pdf/txt/etc).",
],
)
def _resolve_output_path(input_path: Path, outdir: Optional[Path], target_ext: str) -> Path:
base = input_path.stem
directory = outdir if outdir is not None else input_path.parent
directory.mkdir(parents=True, exist_ok=True)
candidate = directory / f"{base}.{target_ext}"
if candidate.exists():
for i in range(1, 1000):
alt = directory / f"{base}_{i}.{target_ext}"
if not alt.exists():
candidate = alt
break
return candidate
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
parsed = parse_cmdlet_args(args, CMDLET)
target_fmt_raw = parsed.get("to") or parsed.get("format")
if not target_fmt_raw:
log("-to <format> is required", file=sys.stderr)
return 1
target_fmt = str(target_fmt_raw).lower().lstrip(".")
target_kind = _detect_kind(target_fmt)
if target_kind == "unknown":
log(f"Unsupported target format: {target_fmt}", file=sys.stderr)
return 1
delete_src = bool(parsed.get("delete", False))
inputs = normalize_result_input(result)
path_arg = parsed.get("path")
outdir_override: Optional[Path] = None
if path_arg:
try:
p = Path(str(path_arg)).expanduser()
if p.exists() and p.is_dir():
outdir_override = p
else:
inputs.append({"path": p})
except Exception:
inputs.append({"path": path_arg})
if not inputs:
log("No input provided to convert-file", file=sys.stderr)
return 1
success = 0
for item in inputs:
input_path: Optional[Path] = None
if isinstance(item, dict):
p = item.get("path") or item.get("target")
elif hasattr(item, "path"):
p = getattr(item, "path")
else:
p = item
try:
input_path = Path(str(p)) if p else None
except Exception:
input_path = None
if not input_path or not input_path.exists() or not input_path.is_file():
log("convert-file: input path missing or not found", file=sys.stderr)
continue
source_ext = input_path.suffix.lower().lstrip(".")
source_kind = _detect_kind(source_ext)
if not _allowed(source_kind, target_kind):
log(
f"Conversion from {source_kind or 'unknown'} to {target_kind} is not allowed",
file=sys.stderr,
)
continue
output_path = _resolve_output_path(input_path, outdir_override, target_fmt)
converted = False
if target_kind in {"video", "audio", "image"}:
converted = _ffmpeg_convert(input_path, output_path, target_kind, copy_metadata=True)
elif target_kind == "doc":
converted = _doc_convert(input_path, output_path)
else:
log(f"No converter for target kind {target_kind}", file=sys.stderr)
if not converted:
continue
try:
out_hash = sha256_file(output_path)
except Exception:
out_hash = None
title = extract_title_from_result(item) or output_path.stem
ctx.emit({
"path": str(output_path),
"title": title,
"hash": out_hash,
"media_kind": target_kind,
"source_path": str(input_path),
})
if delete_src:
try:
input_path.unlink()
log(f"Deleted source file: {input_path}", file=sys.stderr)
except Exception as exc:
log(f"Failed to delete source {input_path}: {exc}", file=sys.stderr)
success += 1
return 0 if success else 1
CMDLET.exec = _run
CMDLET.register()

View File

@@ -22,8 +22,8 @@ from Provider import internetarchive as ia_provider
from Provider import alldebrid as ad_provider
from Provider import openlibrary as ol_provider
from SYS.download import DownloadError, _download_direct_file
from SYS.models import DownloadOptions, DownloadMediaResult
from API.HTTP import _download_direct_file
from SYS.models import DownloadError, DownloadOptions, DownloadMediaResult
from SYS.logger import log, debug
from SYS.pipeline_progress import PipelineProgress
from SYS.result_table import ResultTable
@@ -890,7 +890,6 @@ class Download_File(Cmdlet):
return expanded_items
def _process_provider_items(
self,
*,
piped_items: Sequence[Any],
final_output_dir: Path,
@@ -900,8 +899,9 @@ class Download_File(Cmdlet):
registry: Dict[str,
Any],
progress: PipelineProgress,
) -> int:
) -> tuple[int, int]:
downloaded_count = 0
queued_magnet_submissions = 0
get_search_provider = registry.get("get_search_provider")
SearchResult = registry.get("SearchResult")
@@ -911,8 +911,17 @@ class Download_File(Cmdlet):
config=config
)
total_items = len(expanded_items)
processed_items = 0
try:
if total_items:
progress.set_percent(0)
except Exception:
pass
for item in expanded_items:
try:
label = "item"
table = get_field(item, "table")
title = get_field(item, "title")
target = get_field(item, "path") or get_field(item, "url")
@@ -933,6 +942,25 @@ class Download_File(Cmdlet):
if isinstance(extra_md, dict):
full_metadata = extra_md
try:
label = title or target
label = str(label or "item").strip()
if total_items:
pct = int(round((processed_items / max(1, total_items)) * 100))
progress.set_percent(pct)
progress.set_status(
f"downloading {processed_items + 1}/{total_items}: {label}"
)
except Exception:
pass
transfer_label = label
if str(table or "").lower() == "hifi":
try:
progress.begin_transfer(label=transfer_label, total=None)
except Exception:
pass
# If this looks like a provider item and providers are available, prefer provider.download()
downloaded_path: Optional[Path] = None
attempted_provider_download = False
@@ -1065,6 +1093,45 @@ class Download_File(Cmdlet):
continue
# Magnet targets (e.g., torrent provider results) -> submit/download via AllDebrid
if downloaded_path is None and isinstance(target, str) and is_magnet_link(str(target)):
magnet_spec = ad_provider.resolve_magnet_spec(str(target))
if magnet_spec:
def _on_emit(path: Path, file_url: str, relpath: str, metadata: Dict[str, Any]) -> None:
title_hint = metadata.get("name") or relpath or title
self._emit_local_file(
downloaded_path=path,
source=file_url or target,
title_hint=title_hint,
tags_hint=None,
media_kind_hint="file",
full_metadata=metadata,
progress=progress,
config=config,
provider_hint="alldebrid",
)
downloaded, magnet_id = ad_provider.download_magnet(
magnet_spec,
str(target),
final_output_dir,
config,
progress,
quiet_mode,
self._path_from_download_result,
_on_emit,
)
if downloaded > 0:
downloaded_count += downloaded
continue
# If queued but not yet ready, skip the generic unsupported-target error.
if magnet_id is not None:
queued_magnet_submissions += 1
continue
# Fallback: if we have a direct HTTP URL, download it directly
if (downloaded_path is None and isinstance(target,
str)
@@ -1080,6 +1147,7 @@ class Download_File(Cmdlet):
file=sys.stderr,
)
continue
debug(
f"[download-file] Provider item looks like direct URL, downloading: {target}"
)
@@ -1150,8 +1218,22 @@ class Download_File(Cmdlet):
log(f"Download failed: {e}", file=sys.stderr)
except Exception as e:
log(f"Error downloading item: {e}", file=sys.stderr)
finally:
if str(table or "").lower() == "hifi":
try:
progress.finish_transfer(label=transfer_label)
except Exception:
pass
processed_items += 1
try:
pct = int(round((processed_items / max(1, total_items)) * 100))
progress.set_percent(pct)
if processed_items >= total_items:
progress.clear_status()
except Exception:
pass
return downloaded_count
return downloaded_count, queued_magnet_submissions
# === Streaming helpers (yt-dlp) ===
@@ -2687,6 +2769,15 @@ class Download_File(Cmdlet):
debug(f"Output directory: {final_output_dir}")
try:
PipelineProgress(pipeline_context).ensure_local_ui(
label="download-file",
total_items=len(supported_url),
items_preview=supported_url,
)
except Exception:
pass
clip_spec = parsed.get("clip")
query_spec = parsed.get("query")
@@ -3572,7 +3663,7 @@ class Download_File(Cmdlet):
if early_exit is not None:
return int(early_exit)
downloaded_count += self._process_provider_items(
provider_downloaded, magnet_submissions = self._process_provider_items(
piped_items=piped_items,
final_output_dir=final_output_dir,
config=config,
@@ -3580,9 +3671,13 @@ class Download_File(Cmdlet):
registry=registry,
progress=progress,
)
downloaded_count += provider_downloaded
if downloaded_count > 0 or streaming_downloaded > 0:
debug(f"✓ Successfully processed {downloaded_count} file(s)")
if downloaded_count > 0 or streaming_downloaded > 0 or magnet_submissions > 0:
msg = f"✓ Successfully processed {downloaded_count} file(s)"
if magnet_submissions:
msg += f" and queued {magnet_submissions} magnet(s)"
debug(msg)
return 0
if streaming_exit_code is not None:

View File

@@ -255,7 +255,7 @@ def _pick_supported_ytdlp_url(urls: List[str]) -> Optional[str]:
# Prefer a true support check when the Python module is available.
try:
from SYS.download import is_url_supported_by_ytdlp
from tool.ytdlp import is_url_supported_by_ytdlp
for text in candidates:
try:

View File

@@ -246,7 +246,7 @@ class search_file(Cmdlet):
else:
table_title = f"{provider_label}: {query}".strip().rstrip(":")
preserve_order = provider_lower in {"youtube", "openlibrary", "loc"}
preserve_order = provider_lower in {"youtube", "openlibrary", "loc", "torrent"}
table_type = provider_name
table_meta: Dict[str, Any] = {"provider": provider_name}
if provider_lower == "hifi":

View File

@@ -444,7 +444,7 @@ def _resolve_upload_path(item: Any, config: Dict[str, Any]) -> Optional[str]:
url = _maybe_unlock_alldebrid_url(url, config)
try:
from SYS.download import _download_direct_file
from API.HTTP import _download_direct_file
base_tmp = None
if isinstance(config, dict):

165
docs/provider_guide.md Normal file
View File

@@ -0,0 +1,165 @@
# Provider Development Guide
## 🎯 Purpose
This guide describes how to write, test, and register a provider so the application can discover and use it as a pluggable component.
> Keep provider code small, focused, and well-tested. Use existing providers as examples.
---
## 🔧 Anatomy of a Provider
A provider is a Python class that extends `ProviderCore.base.Provider` and implements a few key methods and attributes.
Minimum expectations:
- `class MyProvider(Provider):` — subclass the base provider
- `URL` / `URL_DOMAINS` or `url_patterns()` — to let the registry route URLs
- `validate(self) -> bool` — return True when provider is configured and usable
- `search(self, query, limit=50, filters=None, **kwargs)` — return a list of `SearchResult`
Optional but common:
- `download(self, result: SearchResult, output_dir: Path) -> Optional[Path]` — download a provider result
- `selector(self, selected_items, *, ctx, stage_is_last=True, **kwargs) -> bool` — handle `@N` selections
- `download_url(self, url, output_dir, progress_cb=None)` — direct URL-handling helper
---
## 🧩 SearchResult
Use `ProviderCore.base.SearchResult` to describe results returned by `search()`.
Important fields:
- `table` (str) — provider table name
- `title` (str) — short human title
- `path` (str) — canonical URL / link the provider/dl may use
- `media_kind` (str) — `file`, `folder`, `book`, etc.
- `columns` (list[tuple[str,str]]) — extra key/value pairs to display
- `full_metadata` (dict) — provider-specific metadata for downstream stages
- `annotations` / `tag` — simple metadata for filtering
Return a list of `SearchResult(...)` objects or simple dicts convertible with `.to_dict()`.
---
## ✅ Implementing search()
- Parse and sanitize `query` and `filters`.
- Return no more than `limit` results.
- Use `columns` to provide table columns (TITLE, Seeds, Size, etc.).
- Keep `search()` fast and predictable (apply reasonable timeouts).
Example:
```python
from ProviderCore.base import Provider, SearchResult
class HelloProvider(Provider):
def search(self, query, limit=50, filters=None, **kwargs):
q = (query or "").strip()
if not q:
return []
results = []
# Build up results
results.append(
SearchResult(
table="hello",
title=f"Hit for {q}",
path=f"https://example/{q}",
columns=[("Info", "example")],
full_metadata={"source": "hello"},
)
)
return results[:max(0, int(limit))]
```
---
## ⬇️ Implementing download() and download_url()
- Prefer provider `download(self, result, output_dir)` for piped provider items.
- For provider-provided URLs, implement `download_url` to allow `download-file` to route downloads through providers.
- Use the repo `_download_direct_file` helper for HTTP downloads when possible.
Example download():
```python
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
# Validate config
url = getattr(result, "path", None)
if not url or not url.startswith("http"):
return None
# use existing helpers to fetch the file
return _download_direct_file(url, output_dir)
```
---
## 🧭 URL routing
Providers can declare:
- `URL = ("magnet:",)` or similar prefix list
- `URL_DOMAINS = ("example.com",)` to match hosts
- Or override `@classmethod def url_patterns(cls):` to combine static and dynamic patterns
The registry uses these to match `download-file <url>` or to pick which provider should handle the URL.
---
## 🛠 Selector (handling `@N` picks)
- Implement `selector(self, selected_items, *, ctx, stage_is_last=True)` to present a sub-table or to enqueue downloads.
- Use `ctx.set_last_result_table()` and `ctx.set_current_stage_table()` to display follow-ups.
- Return `True` when you handled the selection and the pipeline should pause or proceed accordingly.
---
## 🧪 Testing providers
- Keep tests small and local. Create `tests/test_provider_<name>.py`.
- Test `search()` with mock HTTP responses (use `requests-mock` or similar).
- Test `download()` using a temp directory and a small file server or by mocking `_download_direct_file`.
- Test `selector()` by constructing a fake result and `ctx` object.
Example PowerShell commands to run tests (repo root):
```powershell
# Run a single test file
pytest tests/test_provider_hello.py -q
# Run all tests
pytest -q
```
---
## 📦 Registration & packaging
- Add your provider module under `Provider/` and ensure it is imported by module package initialization. Common approach:
- Place file `Provider/myprovider.py`
- Ensure `Provider/__init__.py` imports the module (or the registry auto-discovers by package import)
- If the project has a central provider registry, add lookup helpers there (e.g., `ProviderCore/registry.py`). Usually providers register themselves at import time.
---
## 💡 Best practices & tips
- Use `debug()` / `log()` appropriately; avoid noisy stderr output in normal runs.
- Prefer returning `SearchResult` objects to provide consistent UX.
- Keep `search()` tolerant (timeouts, malformed responses) and avoid raising for expected network problems.
- Use `full_metadata` to pass non-display data to `download()` and `selector()`.
- Respect the `limit` parameter in `search()`.
---
## 🧾 Example provider checklist
- [ ] Implement `search()` and return `SearchResult` items
- [ ] Implement `validate()` to check essential config (API keys, credentials)
- [ ] Provide `URL` / `URL_DOMAINS` or `url_patterns()` for routing
- [ ] Add `download()` or `download_url()` for piped/passed URL downloads
- [ ] Add tests under `tests/`
- [ ] Add module to `Provider/` package and ensure import/registration
---
## 🔗 Further reading
- See existing providers in `Provider/` for patterns and edge cases.
- Check `API/` helpers for HTTP and debrid clients.
---
If you'd like, I can:
- Add an example provider file under `Provider/` as a template (see `Provider/hello_provider.py`), and
- Create unit tests for it (see `tests/test_provider_hello.py`).
I have added a minimal example provider and tests in this repository; use them as a starting point for new providers.

View File

@@ -19,6 +19,7 @@ pypdf>=3.0.0
mutagen>=1.46.0
cbor2>=4.0
zstandard>=0.23.0
pypandoc-binary
# Image and media support
Pillow>=10.0.0
@@ -45,3 +46,4 @@ playwright>=1.40.0
# Development and utilities
python-dateutil>=2.8.0

10
tmp_trim_registry.py Normal file
View File

@@ -0,0 +1,10 @@
from pathlib import Path
path = Path("ProviderCore/registry.py")
text = path.read_text()
marker = '"""Provider registry.'
first = text.find(marker)
second = text.find(marker, first + 1)
if second != -1:
trimmed = text[:second].rstrip() + "\n"
path.write_text(trimmed, encoding="utf-8")

3
tmp_write_registry.py Normal file
View File

@@ -0,0 +1,3 @@
from pathlib import Path
new_content = """"""

View File

@@ -29,6 +29,9 @@ from SYS.models import (
from SYS.pipeline_progress import PipelineProgress
from SYS.utils import ensure_directory, sha256_file
_YTDLP_TRANSFER_STATE: Dict[str, Dict[str, Any]] = {}
try:
import yt_dlp # type: ignore
from yt_dlp.extractor import gen_extractors # type: ignore
@@ -565,9 +568,35 @@ class YtDlpTool:
# Progress + utility helpers for yt-dlp driven downloads (previously in cmdlet/download_media).
_YTDLP_PROGRESS_BAR = ProgressBar()
_YTDLP_TRANSFER_STATE: Dict[str, Dict[str, Any]] = {}
_SUBTITLE_EXTS = (".vtt", ".srt", ".ass", ".ssa", ".lrc")
def _progress_label(status: Dict[str, Any]) -> str:
info_dict = status.get("info_dict") if isinstance(status.get("info_dict"), dict) else {}
candidates = [
status.get("filename"),
info_dict.get("_filename"),
info_dict.get("filepath"),
info_dict.get("title"),
info_dict.get("id"),
]
for cand in candidates:
if not cand:
continue
try:
name = Path(str(cand)).name
except Exception:
name = str(cand)
label = str(name or "").strip()
if label:
return label
return "download"
def _live_ui_and_pipe_index() -> tuple[Optional[Any], int]:
ui = None
try:
@@ -937,19 +966,53 @@ def _extract_sha256(info: Dict[str, Any]) -> Optional[str]:
def _progress_callback(status: Dict[str, Any]) -> None:
label = _progress_label(status)
event = status.get("status")
if event == "downloading":
downloaded = status.get("downloaded_bytes")
total = status.get("total_bytes") or status.get("total_bytes_estimate")
downloaded = status.get("downloaded_bytes")
total = status.get("total_bytes") or status.get("total_bytes_estimate")
_YTDLP_PROGRESS_BAR.update(
downloaded=int(downloaded) if downloaded is not None else None,
total=int(total) if total is not None else None,
label="download",
file=sys.stderr,
)
pipeline = PipelineProgress(pipeline_context)
live_ui, _ = pipeline.ui_and_pipe_index()
use_live = live_ui is not None
def _total_bytes(value: Any) -> Optional[int]:
try:
if isinstance(value, (int, float)) and value > 0:
return int(value)
except Exception:
pass
return None
if event == "downloading":
if use_live:
try:
if not _YTDLP_TRANSFER_STATE.get(label, {}).get("started"):
pipeline.begin_transfer(label=label, total=_total_bytes(total))
_YTDLP_TRANSFER_STATE[label] = {"started": True}
pipeline.update_transfer(
label=label,
completed=int(downloaded) if downloaded is not None else None,
total=_total_bytes(total),
)
except Exception:
pass
else:
_YTDLP_PROGRESS_BAR.update(
downloaded=int(downloaded) if downloaded is not None else None,
total=int(total) if total is not None else None,
label=label,
file=sys.stderr,
)
elif event == "finished":
_YTDLP_PROGRESS_BAR.finish()
if use_live:
try:
if _YTDLP_TRANSFER_STATE.get(label, {}).get("started"):
pipeline.finish_transfer(label=label)
except Exception:
pass
_YTDLP_TRANSFER_STATE.pop(label, None)
else:
_YTDLP_PROGRESS_BAR.finish()
elif event in ("postprocessing", "processing"):
return