dfdkflj
This commit is contained in:
@@ -50,7 +50,6 @@ UrlPolicy = _utils.UrlPolicy
|
||||
DownloadOptions = _download.DownloadOptions
|
||||
DownloadError = _download.DownloadError
|
||||
DownloadMediaResult = _download.DownloadMediaResult
|
||||
download_media = _download.download_media
|
||||
is_url_supported_by_ytdlp = _download.is_url_supported_by_ytdlp
|
||||
probe_url = _download.probe_url
|
||||
# Hydrus utilities
|
||||
|
||||
@@ -35,7 +35,7 @@ class AllDebridClient:
|
||||
"""Client for AllDebrid API."""
|
||||
|
||||
# Try both v4 and v3 APIs
|
||||
BASE_URLS = [
|
||||
BASE_url = [
|
||||
"https://api.alldebrid.com/v4",
|
||||
"https://api.alldebrid.com/v3",
|
||||
]
|
||||
@@ -49,7 +49,7 @@ class AllDebridClient:
|
||||
self.api_key = api_key.strip()
|
||||
if not self.api_key:
|
||||
raise AllDebridError("AllDebrid API key is empty")
|
||||
self.base_url = self.BASE_URLS[0] # Start with v4
|
||||
self.base_url = self.BASE_url[0] # Start with v4
|
||||
|
||||
def _request(self, endpoint: str, params: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
|
||||
"""Make a request to AllDebrid API.
|
||||
@@ -738,7 +738,7 @@ def parse_magnet_or_hash(uri: str) -> Optional[str]:
|
||||
def unlock_link_cmdlet(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Unlock a restricted link using AllDebrid.
|
||||
|
||||
Converts free hosters and restricted links to direct download URLs.
|
||||
Converts free hosters and restricted links to direct download url.
|
||||
|
||||
Usage:
|
||||
unlock-link <link>
|
||||
|
||||
@@ -378,7 +378,7 @@ def download(
|
||||
session: Authenticated requests.Session
|
||||
n_threads: Number of download threads
|
||||
directory: Directory to save images to
|
||||
links: List of image URLs
|
||||
links: List of image url
|
||||
scale: Image resolution (0=highest, 10=lowest)
|
||||
book_id: Archive.org book ID (for re-borrowing)
|
||||
|
||||
|
||||
195
helper/background_notifier.py
Normal file
195
helper/background_notifier.py
Normal file
@@ -0,0 +1,195 @@
|
||||
"""Lightweight console notifier for background WorkerManager tasks.
|
||||
|
||||
Registers a refresh callback on WorkerManager and prints concise updates when
|
||||
workers start, progress, or finish. Intended for CLI background workflows.
|
||||
|
||||
Filters to show only workers related to the current pipeline session to avoid
|
||||
cluttering the terminal with workers from previous sessions.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Callable, Dict, Optional, Set
|
||||
|
||||
from helper.logger import log, debug
|
||||
|
||||
|
||||
class BackgroundNotifier:
|
||||
"""Simple notifier that prints worker status changes for a session."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
manager: Any,
|
||||
output: Callable[[str], None] = log,
|
||||
session_worker_ids: Optional[Set[str]] = None,
|
||||
only_terminal_updates: bool = False,
|
||||
overlay_mode: bool = False,
|
||||
) -> None:
|
||||
self.manager = manager
|
||||
self.output = output
|
||||
self.session_worker_ids = session_worker_ids if session_worker_ids is not None else set()
|
||||
self.only_terminal_updates = only_terminal_updates
|
||||
self.overlay_mode = overlay_mode
|
||||
self._filter_enabled = session_worker_ids is not None
|
||||
self._last_state: Dict[str, str] = {}
|
||||
|
||||
try:
|
||||
self.manager.add_refresh_callback(self._on_refresh)
|
||||
self.manager.start_auto_refresh()
|
||||
except Exception as exc: # pragma: no cover - best effort
|
||||
debug(f"[notifier] Could not attach refresh callback: {exc}")
|
||||
|
||||
def _render_line(self, worker: Dict[str, Any]) -> Optional[str]:
|
||||
# Use worker_id (the actual worker ID we set) for filtering and display
|
||||
worker_id = str(worker.get("worker_id") or "").strip()
|
||||
if not worker_id:
|
||||
# Fallback to database id if worker_id is not set
|
||||
worker_id = str(worker.get("id") or "").strip()
|
||||
if not worker_id:
|
||||
return None
|
||||
|
||||
status = str(worker.get("status") or "running")
|
||||
progress_val = worker.get("progress") or worker.get("progress_percent")
|
||||
progress = ""
|
||||
if isinstance(progress_val, (int, float)):
|
||||
progress = f" {progress_val:.1f}%"
|
||||
elif progress_val:
|
||||
progress = f" {progress_val}"
|
||||
|
||||
step = str(worker.get("current_step") or worker.get("description") or "").strip()
|
||||
parts = [f"[worker:{worker_id}] {status}{progress}"]
|
||||
if step:
|
||||
parts.append(step)
|
||||
return " - ".join(parts)
|
||||
|
||||
def _on_refresh(self, workers: list[Dict[str, Any]]) -> None:
|
||||
overlay_active_workers = 0
|
||||
|
||||
for worker in workers:
|
||||
# Use worker_id (the actual worker ID we set) for filtering
|
||||
worker_id = str(worker.get("worker_id") or "").strip()
|
||||
if not worker_id:
|
||||
# Fallback to database id if worker_id is not set
|
||||
worker_id = str(worker.get("id") or "").strip()
|
||||
if not worker_id:
|
||||
continue
|
||||
|
||||
# If filtering is enabled, skip workers not in this session
|
||||
if self._filter_enabled and worker_id not in self.session_worker_ids:
|
||||
continue
|
||||
|
||||
status = str(worker.get("status") or "running")
|
||||
|
||||
# Overlay mode: only emit on completion; suppress start/progress spam
|
||||
if self.overlay_mode:
|
||||
if status in ("completed", "finished", "error"):
|
||||
progress_val = worker.get("progress") or worker.get("progress_percent") or ""
|
||||
step = str(worker.get("current_step") or worker.get("description") or "").strip()
|
||||
signature = f"{status}|{progress_val}|{step}"
|
||||
|
||||
if self._last_state.get(worker_id) == signature:
|
||||
continue
|
||||
|
||||
self._last_state[worker_id] = signature
|
||||
line = self._render_line(worker)
|
||||
if line:
|
||||
try:
|
||||
self.output(line)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self._last_state.pop(worker_id, None)
|
||||
self.session_worker_ids.discard(worker_id)
|
||||
continue
|
||||
|
||||
# For terminal-only mode, emit once when the worker finishes and skip intermediate updates
|
||||
if self.only_terminal_updates:
|
||||
if status in ("completed", "finished", "error"):
|
||||
if self._last_state.get(worker_id) == status:
|
||||
continue
|
||||
self._last_state[worker_id] = status
|
||||
line = self._render_line(worker)
|
||||
if line:
|
||||
try:
|
||||
self.output(line)
|
||||
except Exception:
|
||||
pass
|
||||
# Stop tracking this worker after terminal notification
|
||||
self.session_worker_ids.discard(worker_id)
|
||||
continue
|
||||
|
||||
# Skip finished workers after showing them once (standard verbose mode)
|
||||
if status in ("completed", "finished", "error"):
|
||||
if worker_id in self._last_state:
|
||||
# Already shown, remove from tracking
|
||||
self._last_state.pop(worker_id, None)
|
||||
self.session_worker_ids.discard(worker_id)
|
||||
continue
|
||||
|
||||
progress_val = worker.get("progress") or worker.get("progress_percent") or ""
|
||||
step = str(worker.get("current_step") or worker.get("description") or "").strip()
|
||||
signature = f"{status}|{progress_val}|{step}"
|
||||
|
||||
if self._last_state.get(worker_id) == signature:
|
||||
continue
|
||||
|
||||
self._last_state[worker_id] = signature
|
||||
line = self._render_line(worker)
|
||||
if line:
|
||||
try:
|
||||
self.output(line)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if self.overlay_mode:
|
||||
try:
|
||||
# If nothing active for this session, clear the overlay text
|
||||
if overlay_active_workers == 0:
|
||||
self.output("")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def ensure_background_notifier(
|
||||
manager: Any,
|
||||
output: Callable[[str], None] = log,
|
||||
session_worker_ids: Optional[Set[str]] = None,
|
||||
only_terminal_updates: bool = False,
|
||||
overlay_mode: bool = False,
|
||||
) -> Optional[BackgroundNotifier]:
|
||||
"""Attach a BackgroundNotifier to a WorkerManager if not already present.
|
||||
|
||||
Args:
|
||||
manager: WorkerManager instance
|
||||
output: Function to call for printing updates
|
||||
session_worker_ids: Set of worker IDs belonging to this pipeline session.
|
||||
If None, show all workers. If a set (even empty), only show workers in that set.
|
||||
"""
|
||||
if manager is None:
|
||||
return None
|
||||
|
||||
existing = getattr(manager, "_background_notifier", None)
|
||||
if isinstance(existing, BackgroundNotifier):
|
||||
# Update session IDs if provided
|
||||
if session_worker_ids is not None:
|
||||
existing._filter_enabled = True
|
||||
existing.session_worker_ids.update(session_worker_ids)
|
||||
# Respect the most restrictive setting for terminal-only updates
|
||||
if only_terminal_updates:
|
||||
existing.only_terminal_updates = True
|
||||
# Enable overlay mode if requested later
|
||||
if overlay_mode:
|
||||
existing.overlay_mode = True
|
||||
return existing
|
||||
|
||||
notifier = BackgroundNotifier(
|
||||
manager,
|
||||
output,
|
||||
session_worker_ids=session_worker_ids,
|
||||
only_terminal_updates=only_terminal_updates,
|
||||
overlay_mode=overlay_mode,
|
||||
)
|
||||
try:
|
||||
manager._background_notifier = notifier # type: ignore[attr-defined]
|
||||
except Exception:
|
||||
pass
|
||||
return notifier
|
||||
223
helper/cmdlet_catalog.py
Normal file
223
helper/cmdlet_catalog.py
Normal file
@@ -0,0 +1,223 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from importlib import import_module
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
try:
|
||||
from cmdlets import REGISTRY
|
||||
except Exception:
|
||||
REGISTRY = {} # type: ignore
|
||||
|
||||
try:
|
||||
from cmdnats import register_native_commands as _register_native_commands
|
||||
except Exception:
|
||||
_register_native_commands = None
|
||||
|
||||
|
||||
def ensure_registry_loaded() -> None:
|
||||
"""Ensure native commands are registered into REGISTRY (idempotent)."""
|
||||
if _register_native_commands and REGISTRY is not None:
|
||||
try:
|
||||
_register_native_commands(REGISTRY)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _normalize_mod_name(mod_name: str) -> str:
|
||||
"""Normalize a command/module name for import resolution."""
|
||||
normalized = (mod_name or "").strip()
|
||||
if normalized.startswith('.'):
|
||||
normalized = normalized.lstrip('.')
|
||||
normalized = normalized.replace('-', '_')
|
||||
return normalized
|
||||
|
||||
|
||||
def import_cmd_module(mod_name: str):
|
||||
"""Import a cmdlet/native module from cmdnats or cmdlets packages."""
|
||||
normalized = _normalize_mod_name(mod_name)
|
||||
if not normalized:
|
||||
return None
|
||||
for package in ("cmdnats", "cmdlets", None):
|
||||
try:
|
||||
qualified = f"{package}.{normalized}" if package else normalized
|
||||
return import_module(qualified)
|
||||
except ModuleNotFoundError:
|
||||
continue
|
||||
except Exception:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def _normalize_arg(arg: Any) -> Dict[str, Any]:
|
||||
"""Convert a CmdletArg/dict into a plain metadata dict."""
|
||||
if isinstance(arg, dict):
|
||||
name = arg.get("name", "")
|
||||
return {
|
||||
"name": str(name).lstrip("-"),
|
||||
"type": arg.get("type", "string"),
|
||||
"required": bool(arg.get("required", False)),
|
||||
"description": arg.get("description", ""),
|
||||
"choices": arg.get("choices", []) or [],
|
||||
"alias": arg.get("alias", ""),
|
||||
"variadic": arg.get("variadic", False),
|
||||
}
|
||||
|
||||
name = getattr(arg, "name", "") or ""
|
||||
return {
|
||||
"name": str(name).lstrip("-"),
|
||||
"type": getattr(arg, "type", "string"),
|
||||
"required": bool(getattr(arg, "required", False)),
|
||||
"description": getattr(arg, "description", ""),
|
||||
"choices": getattr(arg, "choices", []) or [],
|
||||
"alias": getattr(arg, "alias", ""),
|
||||
"variadic": getattr(arg, "variadic", False),
|
||||
}
|
||||
|
||||
|
||||
def get_cmdlet_metadata(cmd_name: str) -> Optional[Dict[str, Any]]:
|
||||
"""Return normalized metadata for a cmdlet, if available (aliases supported)."""
|
||||
ensure_registry_loaded()
|
||||
normalized = cmd_name.replace("-", "_")
|
||||
mod = import_cmd_module(normalized)
|
||||
data = getattr(mod, "CMDLET", None) if mod else None
|
||||
|
||||
# Fallback: resolve via registered function's module (covers aliases)
|
||||
if data is None:
|
||||
try:
|
||||
reg_fn = (REGISTRY or {}).get(cmd_name.replace('_', '-').lower())
|
||||
if reg_fn:
|
||||
owner_mod = getattr(reg_fn, "__module__", "")
|
||||
if owner_mod:
|
||||
owner = import_module(owner_mod)
|
||||
data = getattr(owner, "CMDLET", None)
|
||||
except Exception:
|
||||
data = None
|
||||
|
||||
if not data:
|
||||
return None
|
||||
|
||||
if hasattr(data, "to_dict"):
|
||||
base = data.to_dict()
|
||||
elif isinstance(data, dict):
|
||||
base = data
|
||||
else:
|
||||
base = {}
|
||||
|
||||
name = getattr(data, "name", base.get("name", cmd_name)) or cmd_name
|
||||
aliases = getattr(data, "aliases", base.get("aliases", [])) or []
|
||||
usage = getattr(data, "usage", base.get("usage", ""))
|
||||
summary = getattr(data, "summary", base.get("summary", ""))
|
||||
details = getattr(data, "details", base.get("details", [])) or []
|
||||
args_list = getattr(data, "args", base.get("args", [])) or []
|
||||
args = [_normalize_arg(arg) for arg in args_list]
|
||||
|
||||
return {
|
||||
"name": str(name).replace("_", "-").lower(),
|
||||
"aliases": [str(a).replace("_", "-").lower() for a in aliases if a],
|
||||
"usage": usage,
|
||||
"summary": summary,
|
||||
"details": details,
|
||||
"args": args,
|
||||
"raw": data,
|
||||
}
|
||||
|
||||
|
||||
def list_cmdlet_metadata() -> Dict[str, Dict[str, Any]]:
|
||||
"""Collect metadata for all registered cmdlets keyed by canonical name."""
|
||||
ensure_registry_loaded()
|
||||
entries: Dict[str, Dict[str, Any]] = {}
|
||||
for reg_name in (REGISTRY or {}).keys():
|
||||
meta = get_cmdlet_metadata(reg_name)
|
||||
canonical = str(reg_name).replace("_", "-").lower()
|
||||
|
||||
if meta:
|
||||
canonical = meta.get("name", canonical)
|
||||
aliases = meta.get("aliases", [])
|
||||
base = entries.get(
|
||||
canonical,
|
||||
{
|
||||
"name": canonical,
|
||||
"aliases": [],
|
||||
"usage": "",
|
||||
"summary": "",
|
||||
"details": [],
|
||||
"args": [],
|
||||
"raw": meta.get("raw"),
|
||||
},
|
||||
)
|
||||
merged_aliases = set(base.get("aliases", [])) | set(aliases)
|
||||
if canonical != reg_name:
|
||||
merged_aliases.add(reg_name)
|
||||
base["aliases"] = sorted(a for a in merged_aliases if a and a != canonical)
|
||||
if not base.get("usage") and meta.get("usage"):
|
||||
base["usage"] = meta["usage"]
|
||||
if not base.get("summary") and meta.get("summary"):
|
||||
base["summary"] = meta["summary"]
|
||||
if not base.get("details") and meta.get("details"):
|
||||
base["details"] = meta["details"]
|
||||
if not base.get("args") and meta.get("args"):
|
||||
base["args"] = meta["args"]
|
||||
if not base.get("raw"):
|
||||
base["raw"] = meta.get("raw")
|
||||
entries[canonical] = base
|
||||
else:
|
||||
entries.setdefault(
|
||||
canonical,
|
||||
{"name": canonical, "aliases": [], "usage": "", "summary": "", "details": [], "args": [], "raw": None},
|
||||
)
|
||||
return entries
|
||||
|
||||
|
||||
def list_cmdlet_names(include_aliases: bool = True) -> List[str]:
|
||||
"""Return sorted cmdlet names (optionally including aliases)."""
|
||||
ensure_registry_loaded()
|
||||
entries = list_cmdlet_metadata()
|
||||
names = set()
|
||||
for meta in entries.values():
|
||||
names.add(meta.get("name", ""))
|
||||
if include_aliases:
|
||||
for alias in meta.get("aliases", []):
|
||||
names.add(alias)
|
||||
return sorted(n for n in names if n)
|
||||
|
||||
|
||||
def get_cmdlet_arg_flags(cmd_name: str) -> List[str]:
|
||||
"""Return flag variants for cmdlet arguments (e.g., -name/--name)."""
|
||||
meta = get_cmdlet_metadata(cmd_name)
|
||||
if not meta:
|
||||
return []
|
||||
|
||||
raw = meta.get("raw")
|
||||
if raw and hasattr(raw, "build_flag_registry"):
|
||||
try:
|
||||
registry = raw.build_flag_registry()
|
||||
flags: List[str] = []
|
||||
for flag_set in registry.values():
|
||||
flags.extend(flag_set)
|
||||
return sorted(set(flags))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
flags: List[str] = []
|
||||
for arg in meta.get("args", []):
|
||||
name = arg.get("name")
|
||||
if not name:
|
||||
continue
|
||||
flags.append(f"-{name}")
|
||||
flags.append(f"--{name}")
|
||||
alias = arg.get("alias")
|
||||
if alias:
|
||||
flags.append(f"-{alias}")
|
||||
return flags
|
||||
|
||||
|
||||
def get_cmdlet_arg_choices(cmd_name: str, arg_name: str) -> List[str]:
|
||||
"""Return declared choices for a cmdlet argument."""
|
||||
meta = get_cmdlet_metadata(cmd_name)
|
||||
if not meta:
|
||||
return []
|
||||
target = arg_name.lstrip("-")
|
||||
for arg in meta.get("args", []):
|
||||
if arg.get("name") == target:
|
||||
return list(arg.get("choices", []) or [])
|
||||
return []
|
||||
@@ -28,7 +28,6 @@ from helper.logger import log, debug
|
||||
from .utils import ensure_directory, sha256_file
|
||||
from .http_client import HTTPClient
|
||||
from models import DownloadError, DownloadOptions, DownloadMediaResult, DebugLogger, ProgressBar
|
||||
from hydrus_health_check import get_cookies_file_path
|
||||
|
||||
try:
|
||||
import yt_dlp # type: ignore
|
||||
@@ -145,7 +144,7 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s
|
||||
return None
|
||||
|
||||
|
||||
def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str]) -> tuple[Optional[str], Dict[str, Any]]:
|
||||
def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str], quiet: bool = False) -> tuple[Optional[str], Dict[str, Any]]:
|
||||
"""Download each section separately so merge-file can combine them.
|
||||
|
||||
yt-dlp with multiple --download-sections args merges them into one file.
|
||||
@@ -204,11 +203,14 @@ def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sect
|
||||
info_dict = json.loads(meta_result.stdout.strip())
|
||||
first_section_info = info_dict
|
||||
title_from_first = info_dict.get('title')
|
||||
debug(f"Extracted title from metadata: {title_from_first}")
|
||||
if not quiet:
|
||||
debug(f"Extracted title from metadata: {title_from_first}")
|
||||
except json.JSONDecodeError:
|
||||
debug("Could not parse JSON metadata")
|
||||
if not quiet:
|
||||
debug("Could not parse JSON metadata")
|
||||
except Exception as e:
|
||||
debug(f"Error extracting metadata: {e}")
|
||||
if not quiet:
|
||||
debug(f"Error extracting metadata: {e}")
|
||||
|
||||
# Build yt-dlp command for downloading this section
|
||||
cmd = ["yt-dlp"]
|
||||
@@ -240,8 +242,9 @@ def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sect
|
||||
# Add the URL
|
||||
cmd.append(url)
|
||||
|
||||
debug(f"Running yt-dlp for section {section_idx}/{len(sections_list)}: {section}")
|
||||
debug(f"Command: {' '.join(cmd)}")
|
||||
if not quiet:
|
||||
debug(f"Running yt-dlp for section {section_idx}/{len(sections_list)}: {section}")
|
||||
debug(f"Command: {' '.join(cmd)}")
|
||||
|
||||
# Run the subprocess - don't capture output so progress is shown
|
||||
try:
|
||||
@@ -273,13 +276,15 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
|
||||
"fragment_retries": 10,
|
||||
"http_chunk_size": 10_485_760,
|
||||
"restrictfilenames": True,
|
||||
"progress_hooks": [_progress_callback],
|
||||
"progress_hooks": [] if opts.quiet else [_progress_callback],
|
||||
}
|
||||
|
||||
if opts.cookies_path and opts.cookies_path.is_file():
|
||||
base_options["cookiefile"] = str(opts.cookies_path)
|
||||
else:
|
||||
# Check global cookies file
|
||||
# Check global cookies file lazily to avoid import cycles
|
||||
from hydrus_health_check import get_cookies_file_path # local import
|
||||
|
||||
global_cookies = get_cookies_file_path()
|
||||
if global_cookies:
|
||||
base_options["cookiefile"] = global_cookies
|
||||
@@ -287,7 +292,7 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
|
||||
# Fallback to browser cookies
|
||||
base_options["cookiesfrombrowser"] = ("chrome",)
|
||||
|
||||
# Add no-playlist option if specified (for single video from playlist URLs)
|
||||
# Add no-playlist option if specified (for single video from playlist url)
|
||||
if opts.no_playlist:
|
||||
base_options["noplaylist"] = True
|
||||
|
||||
@@ -336,7 +341,8 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
|
||||
if opts.playlist_items:
|
||||
base_options["playlist_items"] = opts.playlist_items
|
||||
|
||||
debug(f"yt-dlp: mode={opts.mode}, format={base_options.get('format')}")
|
||||
if not opts.quiet:
|
||||
debug(f"yt-dlp: mode={opts.mode}, format={base_options.get('format')}")
|
||||
return base_options
|
||||
|
||||
|
||||
@@ -411,8 +417,8 @@ def _extract_sha256(info: Dict[str, Any]) -> Optional[str]:
|
||||
def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
|
||||
"""Extract the actual download link from LibGen redirect URL.
|
||||
|
||||
LibGen URLs like https://libgen.gl/file.php?id=123456 redirect to
|
||||
actual mirror URLs. This follows the redirect chain to get the real file.
|
||||
LibGen url like https://libgen.gl/file.php?id=123456 redirect to
|
||||
actual mirror url. This follows the redirect chain to get the real file.
|
||||
|
||||
Args:
|
||||
libgen_url: LibGen file.php URL
|
||||
@@ -491,6 +497,7 @@ def _download_direct_file(
|
||||
url: str,
|
||||
output_dir: Path,
|
||||
debug_logger: Optional[DebugLogger] = None,
|
||||
quiet: bool = False,
|
||||
) -> DownloadMediaResult:
|
||||
"""Download a direct file (PDF, image, document, etc.) without yt-dlp."""
|
||||
ensure_directory(output_dir)
|
||||
@@ -535,9 +542,11 @@ def _download_direct_file(
|
||||
extracted_name = match.group(1) or match.group(2)
|
||||
if extracted_name:
|
||||
filename = unquote(extracted_name)
|
||||
debug(f"Filename from Content-Disposition: {filename}")
|
||||
if not quiet:
|
||||
debug(f"Filename from Content-Disposition: {filename}")
|
||||
except Exception as e:
|
||||
log(f"Could not get filename from headers: {e}", file=sys.stderr)
|
||||
if not quiet:
|
||||
log(f"Could not get filename from headers: {e}", file=sys.stderr)
|
||||
|
||||
# Fallback if we still don't have a good filename
|
||||
if not filename or "." not in filename:
|
||||
@@ -546,7 +555,8 @@ def _download_direct_file(
|
||||
file_path = output_dir / filename
|
||||
progress_bar = ProgressBar()
|
||||
|
||||
debug(f"Direct download: {filename}")
|
||||
if not quiet:
|
||||
debug(f"Direct download: {filename}")
|
||||
|
||||
try:
|
||||
start_time = time.time()
|
||||
@@ -577,7 +587,8 @@ def _download_direct_file(
|
||||
speed_str=speed_str,
|
||||
eta_str=eta_str,
|
||||
)
|
||||
debug(progress_line)
|
||||
if not quiet:
|
||||
debug(progress_line)
|
||||
last_progress_time[0] = now
|
||||
|
||||
with HTTPClient(timeout=30.0) as client:
|
||||
@@ -585,7 +596,8 @@ def _download_direct_file(
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
avg_speed_str = progress_bar.format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0) + "/s"
|
||||
debug(f"✓ Downloaded in {elapsed:.1f}s at {avg_speed_str}")
|
||||
if not quiet:
|
||||
debug(f"✓ Downloaded in {elapsed:.1f}s at {avg_speed_str}")
|
||||
|
||||
# For direct file downloads, create minimal info dict without filename as title
|
||||
# This prevents creating duplicate title: tags when filename gets auto-generated
|
||||
@@ -658,375 +670,98 @@ def _download_direct_file(
|
||||
raise DownloadError(f"Error downloading file: {exc}") from exc
|
||||
|
||||
|
||||
def probe_url(url: str, no_playlist: bool = False) -> Optional[Dict[str, Any]]:
|
||||
def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) -> Optional[Dict[str, Any]]:
|
||||
"""Probe URL to extract metadata WITHOUT downloading.
|
||||
|
||||
Args:
|
||||
url: URL to probe
|
||||
no_playlist: If True, ignore playlists and probe only the single video
|
||||
timeout_seconds: Max seconds to wait for probe (default 15s)
|
||||
|
||||
Returns:
|
||||
Dict with keys: extractor, title, entries (if playlist), duration, etc.
|
||||
Returns None if not supported by yt-dlp.
|
||||
Returns None if not supported by yt-dlp or on timeout.
|
||||
"""
|
||||
if not is_url_supported_by_ytdlp(url):
|
||||
return None
|
||||
|
||||
_ensure_yt_dlp_ready()
|
||||
# Wrap probe in timeout to prevent hanging on large playlists
|
||||
import threading
|
||||
from typing import cast
|
||||
|
||||
assert yt_dlp is not None
|
||||
try:
|
||||
# Extract info without downloading
|
||||
# Use extract_flat='in_playlist' to get full metadata for playlist items
|
||||
ydl_opts = {
|
||||
"quiet": True, # Suppress all output
|
||||
"no_warnings": True,
|
||||
"socket_timeout": 10,
|
||||
"retries": 3,
|
||||
"skip_download": True, # Don't actually download
|
||||
"extract_flat": "in_playlist", # Get playlist with metadata for each entry
|
||||
"noprogress": True, # No progress bars
|
||||
}
|
||||
|
||||
# Add cookies if available
|
||||
global_cookies = get_cookies_file_path()
|
||||
if global_cookies:
|
||||
ydl_opts["cookiefile"] = global_cookies
|
||||
|
||||
# Add no_playlist option if specified
|
||||
if no_playlist:
|
||||
ydl_opts["noplaylist"] = True
|
||||
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
|
||||
info = ydl.extract_info(url, download=False)
|
||||
|
||||
if not isinstance(info, dict):
|
||||
return None
|
||||
|
||||
# Extract relevant fields
|
||||
return {
|
||||
"extractor": info.get("extractor", ""),
|
||||
"title": info.get("title", ""),
|
||||
"entries": info.get("entries", []), # Will be populated if playlist
|
||||
"duration": info.get("duration"),
|
||||
"uploader": info.get("uploader"),
|
||||
"description": info.get("description"),
|
||||
"url": url,
|
||||
}
|
||||
except Exception as exc:
|
||||
log(f"Probe failed for {url}: {exc}")
|
||||
return None
|
||||
|
||||
|
||||
def download_media(
|
||||
opts: DownloadOptions,
|
||||
*,
|
||||
debug_logger: Optional[DebugLogger] = None,
|
||||
) -> DownloadMediaResult:
|
||||
"""Download media from URL using yt-dlp or direct HTTP download.
|
||||
result_container: List[Optional[Any]] = [None, None] # [result, error]
|
||||
|
||||
Args:
|
||||
opts: DownloadOptions with url, mode, output_dir, etc.
|
||||
debug_logger: Optional debug logger for troubleshooting
|
||||
|
||||
Returns:
|
||||
DownloadMediaResult with path, info, tags, hash
|
||||
|
||||
Raises:
|
||||
DownloadError: If download fails
|
||||
"""
|
||||
# Handle LibGen URLs specially
|
||||
# file.php redirects to mirrors, get.php is direct from modern API
|
||||
if 'libgen' in opts.url.lower():
|
||||
if '/get.php' in opts.url.lower():
|
||||
# Modern API get.php links are direct downloads from mirrors (not file redirects)
|
||||
log(f"Detected LibGen get.php URL, downloading directly...")
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record("libgen-direct", {"url": opts.url})
|
||||
return _download_direct_file(opts.url, opts.output_dir, debug_logger)
|
||||
elif '/file.php' in opts.url.lower():
|
||||
# Old-style file.php redirects to mirrors, we need to resolve
|
||||
log(f"Detected LibGen file.php URL, resolving to actual mirror...")
|
||||
actual_url = _get_libgen_download_url(opts.url)
|
||||
if actual_url and actual_url != opts.url:
|
||||
log(f"Resolved LibGen URL to mirror: {actual_url}")
|
||||
opts.url = actual_url
|
||||
# After resolution, this will typically be an onion link or direct file
|
||||
# Skip yt-dlp for this (it won't support onion/mirrors), go direct
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record("libgen-resolved", {"original": opts.url, "resolved": actual_url})
|
||||
return _download_direct_file(opts.url, opts.output_dir, debug_logger)
|
||||
else:
|
||||
log(f"Could not resolve LibGen URL, trying direct download anyway", file=sys.stderr)
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record("libgen-resolve-failed", {"url": opts.url})
|
||||
return _download_direct_file(opts.url, opts.output_dir, debug_logger)
|
||||
|
||||
# Handle GoFile shares with a dedicated resolver before yt-dlp/direct fallbacks
|
||||
try:
|
||||
netloc = urlparse(opts.url).netloc.lower()
|
||||
except Exception:
|
||||
netloc = ""
|
||||
if "gofile.io" in netloc:
|
||||
msg = "GoFile links are currently unsupported"
|
||||
debug(msg)
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record("gofile-unsupported", {"url": opts.url})
|
||||
raise DownloadError(msg)
|
||||
|
||||
# Determine if yt-dlp should be used
|
||||
ytdlp_supported = is_url_supported_by_ytdlp(opts.url)
|
||||
if ytdlp_supported:
|
||||
probe_result = probe_url(opts.url, no_playlist=opts.no_playlist)
|
||||
if probe_result is None:
|
||||
log(f"URL supported by yt-dlp but no media detected, falling back to direct download: {opts.url}")
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record("ytdlp-skip-no-media", {"url": opts.url})
|
||||
return _download_direct_file(opts.url, opts.output_dir, debug_logger)
|
||||
else:
|
||||
log(f"URL not supported by yt-dlp, trying direct download: {opts.url}")
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record("direct-file-attempt", {"url": opts.url})
|
||||
return _download_direct_file(opts.url, opts.output_dir, debug_logger)
|
||||
|
||||
_ensure_yt_dlp_ready()
|
||||
|
||||
ytdl_options = _build_ytdlp_options(opts)
|
||||
debug(f"Starting yt-dlp download: {opts.url}")
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record("ytdlp-start", {"url": opts.url})
|
||||
|
||||
assert yt_dlp is not None
|
||||
try:
|
||||
# Debug: show what options we're using
|
||||
if ytdl_options.get("download_sections"):
|
||||
debug(f"[yt-dlp] download_sections: {ytdl_options['download_sections']}")
|
||||
debug(f"[yt-dlp] force_keyframes_at_cuts: {ytdl_options.get('force_keyframes_at_cuts', False)}")
|
||||
|
||||
# Use subprocess when download_sections are present (Python API doesn't support them properly)
|
||||
session_id = None
|
||||
first_section_info = {}
|
||||
if ytdl_options.get("download_sections"):
|
||||
session_id, first_section_info = _download_with_sections_via_cli(opts.url, ytdl_options, ytdl_options.get("download_sections", []))
|
||||
info = None
|
||||
else:
|
||||
with yt_dlp.YoutubeDL(ytdl_options) as ydl: # type: ignore[arg-type]
|
||||
info = ydl.extract_info(opts.url, download=True)
|
||||
except Exception as exc:
|
||||
log(f"yt-dlp failed: {exc}", file=sys.stderr)
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record(
|
||||
"exception",
|
||||
{
|
||||
"phase": "yt-dlp",
|
||||
"error": str(exc),
|
||||
"traceback": traceback.format_exc(),
|
||||
},
|
||||
)
|
||||
raise DownloadError("yt-dlp download failed") from exc
|
||||
|
||||
# If we used subprocess, we need to find the file manually
|
||||
if info is None:
|
||||
# Find files created/modified during this download (after we started)
|
||||
# Look for files matching the expected output template pattern
|
||||
def _do_probe() -> None:
|
||||
try:
|
||||
import glob
|
||||
import time
|
||||
import re
|
||||
_ensure_yt_dlp_ready()
|
||||
|
||||
# Get the expected filename pattern from outtmpl
|
||||
# For sections: "C:\path\{session_id}.section_1_of_3.ext", etc.
|
||||
# For non-sections: "C:\path\title.ext"
|
||||
|
||||
# Wait a moment to ensure files are fully written
|
||||
time.sleep(0.5)
|
||||
|
||||
# List all files in output_dir, sorted by modification time
|
||||
files = sorted(opts.output_dir.iterdir(), key=lambda p: p.stat().st_mtime, reverse=True)
|
||||
if not files:
|
||||
raise FileNotFoundError(f"No files found in {opts.output_dir}")
|
||||
|
||||
# If we downloaded sections, look for files with the session_id pattern
|
||||
if opts.clip_sections and session_id:
|
||||
# Pattern: "{session_id}_1.ext", "{session_id}_2.ext", etc.
|
||||
section_pattern = re.compile(rf'^{re.escape(session_id)}_(\d+)\.')
|
||||
matching_files = [f for f in files if section_pattern.search(f.name)]
|
||||
|
||||
if matching_files:
|
||||
# Sort by section number to ensure correct order
|
||||
def extract_section_num(path: Path) -> int:
|
||||
match = section_pattern.search(path.name)
|
||||
return int(match.group(1)) if match else 999
|
||||
|
||||
matching_files.sort(key=extract_section_num)
|
||||
debug(f"Found {len(matching_files)} section file(s) matching pattern")
|
||||
|
||||
# Now rename section files to use hash-based names
|
||||
# This ensures unique filenames for each section content
|
||||
renamed_files = []
|
||||
|
||||
for idx, section_file in enumerate(matching_files, 1):
|
||||
try:
|
||||
# Calculate hash for the file
|
||||
file_hash = sha256_file(section_file)
|
||||
ext = section_file.suffix
|
||||
new_name = f"{file_hash}{ext}"
|
||||
new_path = opts.output_dir / new_name
|
||||
|
||||
if new_path.exists() and new_path != section_file:
|
||||
# If file with same hash exists, use it and delete the temp one
|
||||
debug(f"File with hash {file_hash} already exists, using existing file.")
|
||||
try:
|
||||
section_file.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
renamed_files.append(new_path)
|
||||
else:
|
||||
section_file.rename(new_path)
|
||||
debug(f"Renamed section file: {section_file.name} → {new_name}")
|
||||
renamed_files.append(new_path)
|
||||
except Exception as e:
|
||||
debug(f"Failed to process section file {section_file.name}: {e}")
|
||||
renamed_files.append(section_file)
|
||||
|
||||
media_path = renamed_files[0]
|
||||
media_paths = renamed_files
|
||||
debug(f"✓ Downloaded {len(media_paths)} section file(s) (session: {session_id})")
|
||||
else:
|
||||
# Fallback to most recent file if pattern not found
|
||||
media_path = files[0]
|
||||
media_paths = None
|
||||
debug(f"✓ Downloaded section file (pattern not found): {media_path.name}")
|
||||
else:
|
||||
# No sections, just take the most recent file
|
||||
media_path = files[0]
|
||||
media_paths = None
|
||||
|
||||
debug(f"✓ Downloaded: {media_path.name}")
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record("ytdlp-file-found", {"path": str(media_path)})
|
||||
except Exception as exc:
|
||||
log(f"Error finding downloaded file: {exc}", file=sys.stderr)
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record(
|
||||
"exception",
|
||||
{"phase": "find-file", "error": str(exc)},
|
||||
)
|
||||
raise DownloadError(str(exc)) from exc
|
||||
|
||||
# Create result with minimal data extracted from filename
|
||||
file_hash = sha256_file(media_path)
|
||||
|
||||
# For section downloads, create tags with the title and build proper info dict
|
||||
tags = []
|
||||
title = ''
|
||||
if first_section_info:
|
||||
title = first_section_info.get('title', '')
|
||||
if title:
|
||||
tags.append(f'title:{title}')
|
||||
debug(f"Added title tag for section download: {title}")
|
||||
|
||||
# Build info dict - always use extracted title if available, not hash
|
||||
if first_section_info:
|
||||
info_dict = first_section_info
|
||||
else:
|
||||
info_dict = {
|
||||
"id": media_path.stem,
|
||||
"title": title or media_path.stem,
|
||||
"ext": media_path.suffix.lstrip(".")
|
||||
assert yt_dlp is not None
|
||||
# Extract info without downloading
|
||||
# Use extract_flat='in_playlist' to get full metadata for playlist items
|
||||
ydl_opts = {
|
||||
"quiet": True, # Suppress all output
|
||||
"no_warnings": True,
|
||||
"socket_timeout": 10,
|
||||
"retries": 2, # Reduce retries for faster timeout
|
||||
"skip_download": True, # Don't actually download
|
||||
"extract_flat": "in_playlist", # Get playlist with metadata for each entry
|
||||
"noprogress": True, # No progress bars
|
||||
}
|
||||
|
||||
return DownloadMediaResult(
|
||||
path=media_path,
|
||||
info=info_dict,
|
||||
tags=tags,
|
||||
source_url=opts.url,
|
||||
hash_value=file_hash,
|
||||
paths=media_paths, # Include all section files if present
|
||||
)
|
||||
|
||||
# Add cookies if available (lazy import to avoid circular dependency)
|
||||
from hydrus_health_check import get_cookies_file_path # local import
|
||||
|
||||
if not isinstance(info, dict):
|
||||
log(f"Unexpected yt-dlp response: {type(info)}", file=sys.stderr)
|
||||
raise DownloadError("Unexpected yt-dlp response type")
|
||||
|
||||
info_dict: Dict[str, Any] = info
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record(
|
||||
"ytdlp-info",
|
||||
{
|
||||
"keys": sorted(info_dict.keys()),
|
||||
"is_playlist": bool(info_dict.get("entries")),
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
entry, media_path = _resolve_entry_and_path(info_dict, opts.output_dir)
|
||||
except FileNotFoundError as exc:
|
||||
log(f"Error: {exc}", file=sys.stderr)
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record(
|
||||
"exception",
|
||||
{"phase": "resolve-path", "error": str(exc)},
|
||||
)
|
||||
raise DownloadError(str(exc)) from exc
|
||||
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record(
|
||||
"resolved-media",
|
||||
{"path": str(media_path), "entry_keys": sorted(entry.keys())},
|
||||
)
|
||||
|
||||
# Extract hash from metadata or compute
|
||||
hash_value = _extract_sha256(entry) or _extract_sha256(info_dict)
|
||||
if not hash_value:
|
||||
try:
|
||||
hash_value = sha256_file(media_path)
|
||||
except OSError as exc:
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record(
|
||||
"hash-error",
|
||||
{"path": str(media_path), "error": str(exc)},
|
||||
)
|
||||
|
||||
# Extract tags using metadata.py
|
||||
tags = []
|
||||
if extract_ytdlp_tags:
|
||||
try:
|
||||
tags = extract_ytdlp_tags(entry)
|
||||
except Exception as e:
|
||||
log(f"Error extracting tags: {e}", file=sys.stderr)
|
||||
|
||||
source_url = (
|
||||
entry.get("webpage_url")
|
||||
or entry.get("original_url")
|
||||
or entry.get("url")
|
||||
)
|
||||
|
||||
debug(f"✓ Downloaded: {media_path.name} ({len(tags)} tags)")
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record(
|
||||
"downloaded",
|
||||
{
|
||||
"path": str(media_path),
|
||||
"tag_count": len(tags),
|
||||
"source_url": source_url,
|
||||
"sha256": hash_value,
|
||||
},
|
||||
)
|
||||
|
||||
return DownloadMediaResult(
|
||||
path=media_path,
|
||||
info=entry,
|
||||
tags=tags,
|
||||
source_url=source_url,
|
||||
hash_value=hash_value,
|
||||
)
|
||||
global_cookies = get_cookies_file_path()
|
||||
if global_cookies:
|
||||
ydl_opts["cookiefile"] = global_cookies
|
||||
|
||||
# Add no_playlist option if specified
|
||||
if no_playlist:
|
||||
ydl_opts["noplaylist"] = True
|
||||
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
|
||||
info = ydl.extract_info(url, download=False)
|
||||
|
||||
if not isinstance(info, dict):
|
||||
result_container[0] = None
|
||||
return
|
||||
|
||||
# Extract relevant fields
|
||||
result_container[0] = {
|
||||
"extractor": info.get("extractor", ""),
|
||||
"title": info.get("title", ""),
|
||||
"entries": info.get("entries", []), # Will be populated if playlist
|
||||
"duration": info.get("duration"),
|
||||
"uploader": info.get("uploader"),
|
||||
"description": info.get("description"),
|
||||
"url": url,
|
||||
}
|
||||
except Exception as exc:
|
||||
log(f"Probe error for {url}: {exc}")
|
||||
result_container[1] = exc
|
||||
|
||||
thread = threading.Thread(target=_do_probe, daemon=False)
|
||||
thread.start()
|
||||
thread.join(timeout=timeout_seconds)
|
||||
|
||||
if thread.is_alive():
|
||||
# Probe timed out - return None to fall back to direct download
|
||||
debug(f"Probe timeout for {url} (>={timeout_seconds}s), proceeding with download")
|
||||
return None
|
||||
|
||||
if result_container[1] is not None:
|
||||
# Probe error - return None to proceed anyway
|
||||
return None
|
||||
|
||||
return cast(Optional[Dict[str, Any]], result_container[0])
|
||||
|
||||
|
||||
__all__ = [
|
||||
"download_media",
|
||||
"is_url_supported_by_ytdlp",
|
||||
"list_formats",
|
||||
"probe_url",
|
||||
"DownloadError",
|
||||
"DownloadOptions",
|
||||
"DownloadMediaResult",
|
||||
]
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -73,7 +73,7 @@ class HydrusRequestSpec:
|
||||
class HydrusClient:
|
||||
"""Thin wrapper around the Hydrus Client API."""
|
||||
|
||||
base_url: str
|
||||
url: str
|
||||
access_key: str = ""
|
||||
timeout: float = 60.0
|
||||
|
||||
@@ -84,10 +84,10 @@ class HydrusClient:
|
||||
_session_key: str = field(init=False, default="", repr=False) # Cached session key
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if not self.base_url:
|
||||
if not self.url:
|
||||
raise ValueError("Hydrus base URL is required")
|
||||
self.base_url = self.base_url.rstrip("/")
|
||||
parsed = urlsplit(self.base_url)
|
||||
self.url = self.url.rstrip("/")
|
||||
parsed = urlsplit(self.url)
|
||||
if parsed.scheme not in {"http", "https"}:
|
||||
raise ValueError("Hydrus base URL must use http or https")
|
||||
self.scheme = parsed.scheme
|
||||
@@ -374,24 +374,24 @@ class HydrusClient:
|
||||
hashes = self._ensure_hashes(file_hashes)
|
||||
if len(hashes) == 1:
|
||||
body = {"hash": hashes[0], "url_to_add": url}
|
||||
return self._post("/add_urls/associate_url", data=body)
|
||||
return self._post("/add_url/associate_url", data=body)
|
||||
|
||||
results: dict[str, Any] = {}
|
||||
for file_hash in hashes:
|
||||
body = {"hash": file_hash, "url_to_add": url}
|
||||
results[file_hash] = self._post("/add_urls/associate_url", data=body)
|
||||
results[file_hash] = self._post("/add_url/associate_url", data=body)
|
||||
return {"batched": results}
|
||||
|
||||
def delete_url(self, file_hashes: Union[str, Iterable[str]], url: str) -> dict[str, Any]:
|
||||
hashes = self._ensure_hashes(file_hashes)
|
||||
if len(hashes) == 1:
|
||||
body = {"hash": hashes[0], "url_to_delete": url}
|
||||
return self._post("/add_urls/associate_url", data=body)
|
||||
return self._post("/add_url/associate_url", data=body)
|
||||
|
||||
results: dict[str, Any] = {}
|
||||
for file_hash in hashes:
|
||||
body = {"hash": file_hash, "url_to_delete": url}
|
||||
results[file_hash] = self._post("/add_urls/associate_url", data=body)
|
||||
results[file_hash] = self._post("/add_url/associate_url", data=body)
|
||||
return {"batched": results}
|
||||
|
||||
def set_notes(self, file_hashes: Union[str, Iterable[str]], notes: dict[str, str], service_name: str) -> dict[str, Any]:
|
||||
@@ -517,7 +517,7 @@ class HydrusClient:
|
||||
file_ids: Sequence[int] | None = None,
|
||||
hashes: Sequence[str] | None = None,
|
||||
include_service_keys_to_tags: bool = True,
|
||||
include_file_urls: bool = False,
|
||||
include_file_url: bool = False,
|
||||
include_duration: bool = True,
|
||||
include_size: bool = True,
|
||||
include_mime: bool = False,
|
||||
@@ -535,7 +535,7 @@ class HydrusClient:
|
||||
include_service_keys_to_tags,
|
||||
lambda v: "true" if v else None,
|
||||
),
|
||||
("include_file_urls", include_file_urls, lambda v: "true" if v else None),
|
||||
("include_file_url", include_file_url, lambda v: "true" if v else None),
|
||||
("include_duration", include_duration, lambda v: "true" if v else None),
|
||||
("include_size", include_size, lambda v: "true" if v else None),
|
||||
("include_mime", include_mime, lambda v: "true" if v else None),
|
||||
@@ -559,13 +559,13 @@ class HydrusClient:
|
||||
def file_url(self, file_hash: str) -> str:
|
||||
hash_param = quote(file_hash)
|
||||
# Don't append access_key parameter for file downloads - use header instead
|
||||
url = f"{self.base_url}/get_files/file?hash={hash_param}"
|
||||
url = f"{self.url}/get_files/file?hash={hash_param}"
|
||||
return url
|
||||
|
||||
def thumbnail_url(self, file_hash: str) -> str:
|
||||
hash_param = quote(file_hash)
|
||||
# Don't append access_key parameter for file downloads - use header instead
|
||||
url = f"{self.base_url}/get_files/thumbnail?hash={hash_param}"
|
||||
url = f"{self.url}/get_files/thumbnail?hash={hash_param}"
|
||||
return url
|
||||
|
||||
|
||||
@@ -612,7 +612,7 @@ def hydrus_request(args, parser) -> int:
|
||||
|
||||
parsed = urlsplit(options.url)
|
||||
if parsed.scheme not in ('http', 'https'):
|
||||
parser.error('Only http and https URLs are supported')
|
||||
parser.error('Only http and https url are supported')
|
||||
if not parsed.hostname:
|
||||
parser.error('Invalid Hydrus URL')
|
||||
|
||||
@@ -1064,7 +1064,7 @@ def hydrus_export(args, _parser) -> int:
|
||||
file_hash = getattr(args, 'file_hash', None) or _extract_hash(args.file_url)
|
||||
if hydrus_url and file_hash:
|
||||
try:
|
||||
client = HydrusClient(base_url=hydrus_url, access_key=args.access_key, timeout=args.timeout)
|
||||
client = HydrusClient(url=hydrus_url, access_key=args.access_key, timeout=args.timeout)
|
||||
meta_response = client.fetch_file_metadata(hashes=[file_hash], include_mime=True)
|
||||
entries = meta_response.get('metadata') if isinstance(meta_response, dict) else None
|
||||
if isinstance(entries, list) and entries:
|
||||
@@ -1301,8 +1301,7 @@ def is_available(config: dict[str, Any], use_cache: bool = True) -> tuple[bool,
|
||||
|
||||
Performs a lightweight probe to verify:
|
||||
- Hydrus URL is configured
|
||||
- Hydrus client library is available
|
||||
- Can connect to Hydrus and retrieve services
|
||||
- Can connect to Hydrus URL/port
|
||||
|
||||
Results are cached per session unless use_cache=False.
|
||||
|
||||
@@ -1330,50 +1329,43 @@ def is_available(config: dict[str, Any], use_cache: bool = True) -> tuple[bool,
|
||||
return False, reason
|
||||
|
||||
access_key = get_hydrus_access_key(config, "home") or ""
|
||||
if not access_key:
|
||||
reason = "Hydrus access key not configured"
|
||||
_HYDRUS_AVAILABLE = False
|
||||
_HYDRUS_UNAVAILABLE_REASON = reason
|
||||
return False, reason
|
||||
|
||||
timeout_raw = config.get("HydrusNetwork_Request_Timeout")
|
||||
try:
|
||||
timeout = float(timeout_raw) if timeout_raw is not None else 10.0
|
||||
timeout = float(timeout_raw) if timeout_raw is not None else 5.0
|
||||
except (TypeError, ValueError):
|
||||
timeout = 10.0
|
||||
timeout = 5.0
|
||||
|
||||
try:
|
||||
# Use HTTPClient directly to avoid session key logic and reduce retries
|
||||
# This prevents log spam when Hydrus is offline (avoiding 3 retries x 2 requests)
|
||||
from helper.http_client import HTTPClient
|
||||
# Simple TCP connection test to URL/port
|
||||
import socket
|
||||
from urllib.parse import urlparse
|
||||
|
||||
probe_url = f"{url.rstrip('/')}/get_services"
|
||||
parsed = urlparse(url)
|
||||
hostname = parsed.hostname or 'localhost'
|
||||
port = parsed.port or (443 if parsed.scheme == 'https' else 80)
|
||||
|
||||
headers = {}
|
||||
if access_key:
|
||||
headers["Hydrus-Client-API-Access-Key"] = access_key
|
||||
|
||||
# Suppress HTTPClient logging during probe to avoid "Request failed" logs on startup
|
||||
http_logger = logging.getLogger("helper.http_client")
|
||||
original_level = http_logger.level
|
||||
http_logger.setLevel(logging.CRITICAL)
|
||||
|
||||
# Try to connect to the host/port
|
||||
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
sock.settimeout(timeout)
|
||||
try:
|
||||
# Use retries=1 (single attempt, no retry) to fail fast
|
||||
with HTTPClient(timeout=timeout, retries=1, headers=headers, verify_ssl=False) as http:
|
||||
try:
|
||||
response = http.get(probe_url)
|
||||
if response.status_code == 200:
|
||||
_HYDRUS_AVAILABLE = True
|
||||
_HYDRUS_UNAVAILABLE_REASON = None
|
||||
return True, None
|
||||
else:
|
||||
# Even if we get a 4xx/5xx, the service is "reachable" but maybe auth failed
|
||||
# But for "availability" we usually mean "usable".
|
||||
# If auth fails (403), we can't use it, so return False.
|
||||
reason = f"HTTP {response.status_code}: {response.reason_phrase}"
|
||||
_HYDRUS_AVAILABLE = False
|
||||
_HYDRUS_UNAVAILABLE_REASON = reason
|
||||
return False, reason
|
||||
except Exception as e:
|
||||
# This catches connection errors from HTTPClient
|
||||
raise e
|
||||
result = sock.connect_ex((hostname, port))
|
||||
if result == 0:
|
||||
_HYDRUS_AVAILABLE = True
|
||||
_HYDRUS_UNAVAILABLE_REASON = None
|
||||
return True, None
|
||||
else:
|
||||
reason = f"Cannot connect to {hostname}:{port}"
|
||||
_HYDRUS_AVAILABLE = False
|
||||
_HYDRUS_UNAVAILABLE_REASON = reason
|
||||
return False, reason
|
||||
finally:
|
||||
http_logger.setLevel(original_level)
|
||||
sock.close()
|
||||
|
||||
except Exception as exc:
|
||||
reason = str(exc)
|
||||
|
||||
@@ -2,15 +2,29 @@
|
||||
|
||||
import sys
|
||||
import inspect
|
||||
import threading
|
||||
from pathlib import Path
|
||||
|
||||
_DEBUG_ENABLED = False
|
||||
_thread_local = threading.local()
|
||||
|
||||
def set_thread_stream(stream):
|
||||
"""Set a custom output stream for the current thread."""
|
||||
_thread_local.stream = stream
|
||||
|
||||
def get_thread_stream():
|
||||
"""Get the custom output stream for the current thread, if any."""
|
||||
return getattr(_thread_local, 'stream', None)
|
||||
|
||||
def set_debug(enabled: bool) -> None:
|
||||
"""Enable or disable debug logging."""
|
||||
global _DEBUG_ENABLED
|
||||
_DEBUG_ENABLED = enabled
|
||||
|
||||
def is_debug_enabled() -> bool:
|
||||
"""Check if debug logging is enabled."""
|
||||
return _DEBUG_ENABLED
|
||||
|
||||
def debug(*args, **kwargs) -> None:
|
||||
"""Print debug message if debug logging is enabled.
|
||||
|
||||
@@ -18,9 +32,22 @@ def debug(*args, **kwargs) -> None:
|
||||
"""
|
||||
if not _DEBUG_ENABLED:
|
||||
return
|
||||
|
||||
# Check if stderr has been redirected to /dev/null (quiet mode)
|
||||
# If so, skip output to avoid queuing in background worker's capture
|
||||
try:
|
||||
stderr_name = getattr(sys.stderr, 'name', '')
|
||||
if 'nul' in str(stderr_name).lower() or '/dev/null' in str(stderr_name):
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Check for thread-local stream first
|
||||
stream = get_thread_stream()
|
||||
if stream:
|
||||
kwargs['file'] = stream
|
||||
# Set default to stderr for debug messages
|
||||
if 'file' not in kwargs:
|
||||
elif 'file' not in kwargs:
|
||||
kwargs['file'] = sys.stderr
|
||||
|
||||
# Prepend DEBUG label
|
||||
@@ -59,8 +86,12 @@ def log(*args, **kwargs) -> None:
|
||||
# Get function name
|
||||
func_name = caller_frame.f_code.co_name
|
||||
|
||||
# Check for thread-local stream first
|
||||
stream = get_thread_stream()
|
||||
if stream:
|
||||
kwargs['file'] = stream
|
||||
# Set default to stdout if not specified
|
||||
if 'file' not in kwargs:
|
||||
elif 'file' not in kwargs:
|
||||
kwargs['file'] = sys.stdout
|
||||
|
||||
if add_prefix:
|
||||
|
||||
@@ -96,7 +96,7 @@ class MPVfile:
|
||||
relationship_metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
tags: List[str] = field(default_factory=list)
|
||||
original_tags: Dict[str, str] = field(default_factory=dict)
|
||||
known_urls: List[str] = field(default_factory=list)
|
||||
url: List[str] = field(default_factory=list)
|
||||
title: Optional[str] = None
|
||||
source_url: Optional[str] = None
|
||||
clip_time: Optional[str] = None
|
||||
@@ -128,7 +128,7 @@ class MPVfile:
|
||||
"relationship_metadata": self.relationship_metadata,
|
||||
"tags": self.tags,
|
||||
"original_tags": self.original_tags,
|
||||
"known_urls": self.known_urls,
|
||||
"url": self.url,
|
||||
"title": self.title,
|
||||
"source_url": self.source_url,
|
||||
"clip_time": self.clip_time,
|
||||
@@ -293,10 +293,10 @@ class MPVFileBuilder:
|
||||
if s.tags:
|
||||
s.original_tags = {tag: tag for tag in s.tags}
|
||||
|
||||
# known URLs + last_url
|
||||
s.known_urls = _normalise_string_list(p.get("known_urls"))
|
||||
if self.last_url and self.last_url not in s.known_urls:
|
||||
s.known_urls.append(self.last_url)
|
||||
# known url + last_url
|
||||
s.url = _normalise_string_list(p.get("url"))
|
||||
if self.last_url and self.last_url not in s.url:
|
||||
s.url.append(self.last_url)
|
||||
|
||||
# source URL (explicit or fallback to last_url)
|
||||
explicit_source = p.get("source_url")
|
||||
@@ -500,8 +500,8 @@ class MPVFileBuilder:
|
||||
self._apply_hydrus_result(result)
|
||||
self.state.type = "hydrus"
|
||||
matched_url = result.get("matched_url") or result.get("url")
|
||||
if matched_url and matched_url not in self.state.known_urls:
|
||||
self.state.known_urls.append(matched_url)
|
||||
if matched_url and matched_url not in self.state.url:
|
||||
self.state.url.append(matched_url)
|
||||
# Enrich relationships once we know the hash
|
||||
if self.include_relationships and self.state.hash and self.hydrus_settings.base_url:
|
||||
self._enrich_relationships_from_api(self.state.hash)
|
||||
@@ -527,7 +527,7 @@ class MPVFileBuilder:
|
||||
metadata_payload["type"] = "other"
|
||||
self.state.metadata = metadata_payload
|
||||
# Do NOT overwrite MPVfile.type with metadata.type
|
||||
self._merge_known_urls(metadata_payload.get("known_urls") or metadata_payload.get("known_urls_set"))
|
||||
self._merge_url(metadata_payload.get("url") or metadata_payload.get("url_set"))
|
||||
source_url = metadata_payload.get("original_url") or metadata_payload.get("source_url")
|
||||
if source_url and not self.state.source_url:
|
||||
self.state.source_url = self._normalise_url(source_url)
|
||||
@@ -722,7 +722,7 @@ class MPVFileBuilder:
|
||||
include_service_keys_to_tags=True,
|
||||
include_duration=True,
|
||||
include_size=True,
|
||||
include_file_urls=False,
|
||||
include_file_url=False,
|
||||
include_mime=False,
|
||||
)
|
||||
except HydrusRequestError as hre: # pragma: no cover
|
||||
@@ -801,11 +801,11 @@ class MPVFileBuilder:
|
||||
if tag not in self.state.original_tags:
|
||||
self.state.original_tags[tag] = tag
|
||||
|
||||
def _merge_known_urls(self, urls: Optional[Iterable[Any]]) -> None:
|
||||
if not urls:
|
||||
def _merge_url(self, url: Optional[Iterable[Any]]) -> None:
|
||||
if not url:
|
||||
return
|
||||
combined = list(self.state.known_urls or []) + _normalise_string_list(urls)
|
||||
self.state.known_urls = unique_preserve_order(combined)
|
||||
combined = list(self.state.url or []) + _normalise_string_list(url)
|
||||
self.state.url = unique_preserve_order(combined)
|
||||
|
||||
def _load_sidecar_tags(self, local_path: str) -> None:
|
||||
try:
|
||||
@@ -821,7 +821,7 @@ class MPVFileBuilder:
|
||||
if hash_value and not self.state.hash and _looks_like_hash(hash_value):
|
||||
self.state.hash = hash_value.lower()
|
||||
self._merge_tags(tags)
|
||||
self._merge_known_urls(known)
|
||||
self._merge_url(known)
|
||||
break
|
||||
|
||||
def _read_sidecar(self, sidecar_path: Path) -> tuple[Optional[str], List[str], List[str]]:
|
||||
@@ -831,7 +831,7 @@ class MPVFileBuilder:
|
||||
return None, [], []
|
||||
hash_value: Optional[str] = None
|
||||
tags: List[str] = []
|
||||
known_urls: List[str] = []
|
||||
url: List[str] = []
|
||||
for line in raw.splitlines():
|
||||
trimmed = line.strip()
|
||||
if not trimmed:
|
||||
@@ -841,13 +841,13 @@ class MPVFileBuilder:
|
||||
candidate = trimmed.split(":", 1)[1].strip() if ":" in trimmed else ""
|
||||
if candidate:
|
||||
hash_value = candidate
|
||||
elif lowered.startswith("known_url:") or lowered.startswith("url:"):
|
||||
elif lowered.startswith("url:") or lowered.startswith("url:"):
|
||||
candidate = trimmed.split(":", 1)[1].strip() if ":" in trimmed else ""
|
||||
if candidate:
|
||||
known_urls.append(candidate)
|
||||
url.append(candidate)
|
||||
else:
|
||||
tags.append(trimmed)
|
||||
return hash_value, tags, known_urls
|
||||
return hash_value, tags, url
|
||||
|
||||
def _compute_local_hash(self, local_path: str) -> None:
|
||||
try:
|
||||
@@ -864,8 +864,8 @@ class MPVFileBuilder:
|
||||
def _finalise(self) -> None:
|
||||
if self.state.tags:
|
||||
self.state.tags = unique_preserve_order(self.state.tags)
|
||||
if self.state.known_urls:
|
||||
self.state.known_urls = unique_preserve_order(self.state.known_urls)
|
||||
if self.state.url:
|
||||
self.state.url = unique_preserve_order(self.state.url)
|
||||
# Ensure metadata.type is always present for Lua, but do NOT overwrite MPVfile.type
|
||||
if not self.state.title:
|
||||
if self.state.metadata.get("title"):
|
||||
|
||||
@@ -85,7 +85,7 @@ def _normalize_target(text: Optional[str]) -> Optional[str]:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Normalize paths/urls for comparison
|
||||
# Normalize paths/url for comparison
|
||||
return lower.replace('\\', '\\')
|
||||
|
||||
|
||||
|
||||
818
helper/provider.py
Normal file
818
helper/provider.py
Normal file
@@ -0,0 +1,818 @@
|
||||
"""Provider interfaces for search and file upload functionality.
|
||||
|
||||
This module defines two distinct provider types:
|
||||
1. SearchProvider: For searching content (books, music, videos, games)
|
||||
2. FileProvider: For uploading files to hosting services
|
||||
|
||||
No legacy code or backwards compatibility - clean, single source of truth.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import asyncio
|
||||
import subprocess
|
||||
import shutil
|
||||
import mimetypes
|
||||
import traceback
|
||||
import requests
|
||||
|
||||
from helper.logger import log, debug
|
||||
|
||||
# Optional dependencies
|
||||
try:
|
||||
from playwright.sync_api import sync_playwright
|
||||
PLAYWRIGHT_AVAILABLE = True
|
||||
except ImportError:
|
||||
PLAYWRIGHT_AVAILABLE = False
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# SEARCH PROVIDERS
|
||||
# ============================================================================
|
||||
|
||||
@dataclass
|
||||
class SearchResult:
|
||||
"""Unified search result format across all search providers."""
|
||||
|
||||
origin: str # Provider name: "libgen", "soulseek", "debrid", "bandcamp", etc.
|
||||
title: str # Display title/filename
|
||||
path: str # Download target (URL, path, magnet, identifier)
|
||||
|
||||
detail: str = "" # Additional description
|
||||
annotations: List[str] = field(default_factory=list) # Tags: ["120MB", "flac", "ready"]
|
||||
media_kind: str = "other" # Type: "book", "audio", "video", "game", "magnet"
|
||||
size_bytes: Optional[int] = None
|
||||
tags: set[str] = field(default_factory=set) # Searchable tags
|
||||
columns: List[Tuple[str, str]] = field(default_factory=list) # Display columns
|
||||
full_metadata: Dict[str, Any] = field(default_factory=dict) # Extra metadata
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary for pipeline processing."""
|
||||
return {
|
||||
"origin": self.origin,
|
||||
"title": self.title,
|
||||
"path": self.path,
|
||||
"detail": self.detail,
|
||||
"annotations": self.annotations,
|
||||
"media_kind": self.media_kind,
|
||||
"size_bytes": self.size_bytes,
|
||||
"tags": list(self.tags),
|
||||
"columns": list(self.columns),
|
||||
"full_metadata": self.full_metadata,
|
||||
}
|
||||
|
||||
|
||||
class SearchProvider(ABC):
|
||||
"""Base class for search providers."""
|
||||
|
||||
def __init__(self, config: Dict[str, Any] = None):
|
||||
self.config = config or {}
|
||||
self.name = self.__class__.__name__.lower()
|
||||
|
||||
@abstractmethod
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**kwargs
|
||||
) -> List[SearchResult]:
|
||||
"""Search for items matching the query.
|
||||
|
||||
Args:
|
||||
query: Search query string
|
||||
limit: Maximum results to return
|
||||
filters: Optional filtering criteria
|
||||
**kwargs: Provider-specific arguments
|
||||
|
||||
Returns:
|
||||
List of SearchResult objects
|
||||
"""
|
||||
pass
|
||||
|
||||
def validate(self) -> bool:
|
||||
"""Check if provider is available and properly configured."""
|
||||
return True
|
||||
|
||||
|
||||
class Libgen(SearchProvider):
|
||||
"""Search provider for Library Genesis books."""
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**kwargs
|
||||
) -> List[SearchResult]:
|
||||
filters = filters or {}
|
||||
|
||||
try:
|
||||
from helper.unified_book_downloader import UnifiedBookDownloader
|
||||
from helper.query_parser import parse_query, get_field, get_free_text
|
||||
|
||||
parsed = parse_query(query)
|
||||
isbn = get_field(parsed, 'isbn')
|
||||
author = get_field(parsed, 'author')
|
||||
title = get_field(parsed, 'title')
|
||||
free_text = get_free_text(parsed)
|
||||
|
||||
search_query = isbn or title or author or free_text or query
|
||||
|
||||
downloader = UnifiedBookDownloader(config=self.config)
|
||||
books = downloader.search_libgen(search_query, limit=limit)
|
||||
|
||||
results = []
|
||||
for idx, book in enumerate(books, 1):
|
||||
title = book.get("title", "Unknown")
|
||||
author = book.get("author", "Unknown")
|
||||
year = book.get("year", "Unknown")
|
||||
pages = book.get("pages") or book.get("pages_str") or ""
|
||||
extension = book.get("extension", "") or book.get("ext", "")
|
||||
filesize = book.get("filesize_str", "Unknown")
|
||||
isbn = book.get("isbn", "")
|
||||
mirror_url = book.get("mirror_url", "")
|
||||
|
||||
columns = [
|
||||
("Title", title),
|
||||
("Author", author),
|
||||
("Pages", str(pages)),
|
||||
("Ext", str(extension)),
|
||||
]
|
||||
|
||||
detail = f"By: {author}"
|
||||
if year and year != "Unknown":
|
||||
detail += f" ({year})"
|
||||
|
||||
annotations = [f"{filesize}"]
|
||||
if isbn:
|
||||
annotations.append(f"ISBN: {isbn}")
|
||||
|
||||
results.append(SearchResult(
|
||||
origin="libgen",
|
||||
title=title,
|
||||
path=mirror_url or f"libgen:{book.get('id', '')}",
|
||||
detail=detail,
|
||||
annotations=annotations,
|
||||
media_kind="book",
|
||||
columns=columns,
|
||||
full_metadata={
|
||||
"number": idx,
|
||||
"author": author,
|
||||
"year": year,
|
||||
"isbn": isbn,
|
||||
"filesize": filesize,
|
||||
"pages": pages,
|
||||
"extension": extension,
|
||||
"book_id": book.get("book_id", ""),
|
||||
"md5": book.get("md5", ""),
|
||||
},
|
||||
))
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
log(f"[libgen] Search error: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
def validate(self) -> bool:
|
||||
try:
|
||||
from helper.unified_book_downloader import UnifiedBookDownloader
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
class Soulseek(SearchProvider):
|
||||
"""Search provider for Soulseek P2P network."""
|
||||
|
||||
MUSIC_EXTENSIONS = {
|
||||
'.flac', '.mp3', '.m4a', '.aac', '.ogg', '.opus',
|
||||
'.wav', '.alac', '.wma', '.ape', '.aiff', '.dsf',
|
||||
'.dff', '.wv', '.tta', '.tak', '.ac3', '.dts'
|
||||
}
|
||||
|
||||
USERNAME = "asjhkjljhkjfdsd334"
|
||||
PASSWORD = "khhhg"
|
||||
DOWNLOAD_DIR = "./downloads"
|
||||
MAX_WAIT_TRANSFER = 1200
|
||||
|
||||
async def perform_search(
|
||||
self,
|
||||
query: str,
|
||||
timeout: float = 9.0,
|
||||
limit: int = 50
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Perform async Soulseek search."""
|
||||
import os
|
||||
from aioslsk.client import SoulSeekClient
|
||||
from aioslsk.settings import Settings, CredentialsSettings
|
||||
|
||||
os.makedirs(self.DOWNLOAD_DIR, exist_ok=True)
|
||||
|
||||
settings = Settings(credentials=CredentialsSettings(username=self.USERNAME, password=self.PASSWORD))
|
||||
client = SoulSeekClient(settings)
|
||||
|
||||
try:
|
||||
await client.start()
|
||||
await client.login()
|
||||
except Exception as e:
|
||||
log(f"[soulseek] Login failed: {type(e).__name__}: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
try:
|
||||
search_request = await client.searches.search(query)
|
||||
await self._collect_results(client, search_request, timeout=timeout)
|
||||
return self._flatten_results(search_request)[:limit]
|
||||
except Exception as e:
|
||||
log(f"[soulseek] Search error: {type(e).__name__}: {e}", file=sys.stderr)
|
||||
return []
|
||||
finally:
|
||||
try:
|
||||
await client.stop()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _flatten_results(self, search_request) -> List[dict]:
|
||||
flat = []
|
||||
for result in search_request.results:
|
||||
username = getattr(result, "username", "?")
|
||||
|
||||
for file_data in getattr(result, "shared_items", []):
|
||||
flat.append({
|
||||
"file": file_data,
|
||||
"username": username,
|
||||
"filename": getattr(file_data, "filename", "?"),
|
||||
"size": getattr(file_data, "filesize", 0),
|
||||
})
|
||||
|
||||
for file_data in getattr(result, "locked_results", []):
|
||||
flat.append({
|
||||
"file": file_data,
|
||||
"username": username,
|
||||
"filename": getattr(file_data, "filename", "?"),
|
||||
"size": getattr(file_data, "filesize", 0),
|
||||
})
|
||||
|
||||
return flat
|
||||
|
||||
async def _collect_results(self, client, search_request, timeout: float = 75.0) -> None:
|
||||
end = time.time() + timeout
|
||||
last_count = 0
|
||||
while time.time() < end:
|
||||
current_count = len(search_request.results)
|
||||
if current_count > last_count:
|
||||
debug(f"[soulseek] Got {current_count} result(s)...")
|
||||
last_count = current_count
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**kwargs
|
||||
) -> List[SearchResult]:
|
||||
filters = filters or {}
|
||||
|
||||
try:
|
||||
flat_results = asyncio.run(self.perform_search(query, timeout=9.0, limit=limit))
|
||||
|
||||
if not flat_results:
|
||||
return []
|
||||
|
||||
# Filter to music files only
|
||||
music_results = []
|
||||
for item in flat_results:
|
||||
filename = item['filename']
|
||||
ext = '.' + filename.rsplit('.', 1)[-1].lower() if '.' in filename else ''
|
||||
if ext in self.MUSIC_EXTENSIONS:
|
||||
music_results.append(item)
|
||||
|
||||
if not music_results:
|
||||
return []
|
||||
|
||||
# Extract metadata
|
||||
enriched_results = []
|
||||
for item in music_results:
|
||||
filename = item['filename']
|
||||
ext = '.' + filename.rsplit('.', 1)[-1].lower() if '.' in filename else ''
|
||||
|
||||
# Get display filename
|
||||
display_name = filename.split('\\')[-1] if '\\' in filename else filename.split('/')[-1] if '/' in filename else filename
|
||||
|
||||
# Extract path hierarchy
|
||||
path_parts = filename.replace('\\', '/').split('/')
|
||||
artist = path_parts[-3] if len(path_parts) >= 3 else ''
|
||||
album = path_parts[-2] if len(path_parts) >= 3 else path_parts[-2] if len(path_parts) == 2 else ''
|
||||
|
||||
# Extract track number and title
|
||||
base_name = display_name.rsplit('.', 1)[0] if '.' in display_name else display_name
|
||||
track_num = ''
|
||||
title = base_name
|
||||
filename_artist = ''
|
||||
|
||||
match = re.match(r'^(\d{1,3})\s*[\.\-]?\s+(.+)$', base_name)
|
||||
if match:
|
||||
track_num = match.group(1)
|
||||
rest = match.group(2)
|
||||
if ' - ' in rest:
|
||||
filename_artist, title = rest.split(' - ', 1)
|
||||
else:
|
||||
title = rest
|
||||
|
||||
if filename_artist:
|
||||
artist = filename_artist
|
||||
|
||||
enriched_results.append({
|
||||
**item,
|
||||
'artist': artist,
|
||||
'album': album,
|
||||
'title': title,
|
||||
'track_num': track_num,
|
||||
'ext': ext
|
||||
})
|
||||
|
||||
# Apply filters
|
||||
if filters:
|
||||
artist_filter = filters.get('artist', '').lower() if filters.get('artist') else ''
|
||||
album_filter = filters.get('album', '').lower() if filters.get('album') else ''
|
||||
track_filter = filters.get('track', '').lower() if filters.get('track') else ''
|
||||
|
||||
if artist_filter or album_filter or track_filter:
|
||||
filtered = []
|
||||
for item in enriched_results:
|
||||
if artist_filter and artist_filter not in item['artist'].lower():
|
||||
continue
|
||||
if album_filter and album_filter not in item['album'].lower():
|
||||
continue
|
||||
if track_filter and track_filter not in item['title'].lower():
|
||||
continue
|
||||
filtered.append(item)
|
||||
enriched_results = filtered
|
||||
|
||||
# Sort: .flac first, then by size
|
||||
enriched_results.sort(key=lambda item: (item['ext'].lower() != '.flac', -item['size']))
|
||||
|
||||
# Convert to SearchResult
|
||||
results = []
|
||||
for idx, item in enumerate(enriched_results, 1):
|
||||
artist_display = item['artist'] if item['artist'] else "(no artist)"
|
||||
album_display = item['album'] if item['album'] else "(no album)"
|
||||
size_mb = int(item['size'] / 1024 / 1024)
|
||||
|
||||
columns = [
|
||||
("Track", item['track_num'] or "?"),
|
||||
("Title", item['title'][:40]),
|
||||
("Artist", artist_display[:32]),
|
||||
("Album", album_display[:32]),
|
||||
("Size", f"{size_mb} MB"),
|
||||
]
|
||||
|
||||
results.append(SearchResult(
|
||||
origin="soulseek",
|
||||
title=item['title'],
|
||||
path=item['filename'],
|
||||
detail=f"{artist_display} - {album_display}",
|
||||
annotations=[f"{size_mb} MB", item['ext'].lstrip('.').upper()],
|
||||
media_kind="audio",
|
||||
size_bytes=item['size'],
|
||||
columns=columns,
|
||||
full_metadata={
|
||||
"username": item['username'],
|
||||
"filename": item['filename'],
|
||||
"artist": item['artist'],
|
||||
"album": item['album'],
|
||||
"track_num": item['track_num'],
|
||||
"ext": item['ext'],
|
||||
},
|
||||
))
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
log(f"[soulseek] Search error: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
def validate(self) -> bool:
|
||||
try:
|
||||
from aioslsk.client import SoulSeekClient
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
|
||||
class Bandcamp(SearchProvider):
|
||||
"""Search provider for Bandcamp."""
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**kwargs
|
||||
) -> List[SearchResult]:
|
||||
if not PLAYWRIGHT_AVAILABLE:
|
||||
log("[bandcamp] Playwright not available. Install with: pip install playwright", file=sys.stderr)
|
||||
return []
|
||||
|
||||
results = []
|
||||
try:
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
page = browser.new_page()
|
||||
|
||||
# Parse query for artist: prefix
|
||||
if query.strip().lower().startswith("artist:"):
|
||||
artist_name = query[7:].strip().strip('"')
|
||||
search_url = f"https://bandcamp.com/search?q={artist_name}&item_type=b"
|
||||
else:
|
||||
search_url = f"https://bandcamp.com/search?q={query}&item_type=a"
|
||||
|
||||
results = self._scrape_url(page, search_url, limit)
|
||||
|
||||
browser.close()
|
||||
except Exception as e:
|
||||
log(f"[bandcamp] Search error: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
return results
|
||||
|
||||
def _scrape_url(self, page, url: str, limit: int) -> List[SearchResult]:
|
||||
debug(f"[bandcamp] Scraping: {url}")
|
||||
|
||||
page.goto(url)
|
||||
page.wait_for_load_state("domcontentloaded")
|
||||
|
||||
results = []
|
||||
|
||||
# Check for search results
|
||||
search_results = page.query_selector_all(".searchresult")
|
||||
if search_results:
|
||||
for item in search_results[:limit]:
|
||||
try:
|
||||
heading = item.query_selector(".heading")
|
||||
if not heading:
|
||||
continue
|
||||
|
||||
link = heading.query_selector("a")
|
||||
if not link:
|
||||
continue
|
||||
|
||||
title = link.inner_text().strip()
|
||||
target_url = link.get_attribute("href")
|
||||
|
||||
subhead = item.query_selector(".subhead")
|
||||
artist = subhead.inner_text().strip() if subhead else "Unknown"
|
||||
|
||||
itemtype = item.query_selector(".itemtype")
|
||||
media_type = itemtype.inner_text().strip() if itemtype else "album"
|
||||
|
||||
results.append(SearchResult(
|
||||
origin="bandcamp",
|
||||
title=title,
|
||||
path=target_url,
|
||||
detail=f"By: {artist}",
|
||||
annotations=[media_type],
|
||||
media_kind="audio",
|
||||
columns=[
|
||||
("Name", title),
|
||||
("Artist", artist),
|
||||
("Type", media_type),
|
||||
],
|
||||
full_metadata={
|
||||
"artist": artist,
|
||||
"type": media_type,
|
||||
},
|
||||
))
|
||||
except Exception as e:
|
||||
debug(f"[bandcamp] Error parsing result: {e}")
|
||||
continue
|
||||
|
||||
return results
|
||||
|
||||
def validate(self) -> bool:
|
||||
return PLAYWRIGHT_AVAILABLE
|
||||
|
||||
|
||||
class YouTube(SearchProvider):
|
||||
"""Search provider for YouTube using yt-dlp."""
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 10,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**kwargs
|
||||
) -> List[SearchResult]:
|
||||
ytdlp_path = shutil.which("yt-dlp")
|
||||
if not ytdlp_path:
|
||||
log("[youtube] yt-dlp not found in PATH", file=sys.stderr)
|
||||
return []
|
||||
|
||||
search_query = f"ytsearch{limit}:{query}"
|
||||
|
||||
cmd = [
|
||||
ytdlp_path,
|
||||
"--dump-json",
|
||||
"--flat-playlist",
|
||||
"--no-warnings",
|
||||
search_query
|
||||
]
|
||||
|
||||
try:
|
||||
process = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
encoding="utf-8",
|
||||
errors="replace"
|
||||
)
|
||||
|
||||
if process.returncode != 0:
|
||||
log(f"[youtube] yt-dlp failed: {process.stderr}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
results = []
|
||||
for line in process.stdout.splitlines():
|
||||
if not line.strip():
|
||||
continue
|
||||
try:
|
||||
video_data = json.loads(line)
|
||||
title = video_data.get("title", "Unknown")
|
||||
video_id = video_data.get("id", "")
|
||||
url = video_data.get("url") or f"https://youtube.com/watch?v={video_id}"
|
||||
uploader = video_data.get("uploader", "Unknown")
|
||||
duration = video_data.get("duration", 0)
|
||||
view_count = video_data.get("view_count", 0)
|
||||
|
||||
duration_str = f"{int(duration//60)}:{int(duration%60):02d}" if duration else ""
|
||||
views_str = f"{view_count:,}" if view_count else ""
|
||||
|
||||
results.append(SearchResult(
|
||||
origin="youtube",
|
||||
title=title,
|
||||
path=url,
|
||||
detail=f"By: {uploader}",
|
||||
annotations=[duration_str, f"{views_str} views"],
|
||||
media_kind="video",
|
||||
columns=[
|
||||
("Title", title),
|
||||
("Uploader", uploader),
|
||||
("Duration", duration_str),
|
||||
("Views", views_str),
|
||||
],
|
||||
full_metadata={
|
||||
"video_id": video_id,
|
||||
"uploader": uploader,
|
||||
"duration": duration,
|
||||
"view_count": view_count,
|
||||
},
|
||||
))
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
log(f"[youtube] Error: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
def validate(self) -> bool:
|
||||
return shutil.which("yt-dlp") is not None
|
||||
|
||||
def pipe(self, path: str, config: Optional[Dict[str, Any]] = None) -> Optional[str]:
|
||||
"""Return the playable URL for MPV (just the path for YouTube)."""
|
||||
return path
|
||||
|
||||
|
||||
# Search provider registry
|
||||
_SEARCH_PROVIDERS = {
|
||||
"libgen": Libgen,
|
||||
"soulseek": Soulseek,
|
||||
"bandcamp": Bandcamp,
|
||||
"youtube": YouTube,
|
||||
}
|
||||
|
||||
|
||||
def get_search_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[SearchProvider]:
|
||||
"""Get a search provider by name."""
|
||||
provider_class = _SEARCH_PROVIDERS.get(name.lower())
|
||||
|
||||
if provider_class is None:
|
||||
log(f"[provider] Unknown search provider: {name}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
try:
|
||||
provider = provider_class(config)
|
||||
if not provider.validate():
|
||||
log(f"[provider] Provider '{name}' is not available", file=sys.stderr)
|
||||
return None
|
||||
return provider
|
||||
except Exception as e:
|
||||
log(f"[provider] Error initializing '{name}': {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
def list_search_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
|
||||
"""List all search providers and their availability."""
|
||||
availability = {}
|
||||
for name, provider_class in _SEARCH_PROVIDERS.items():
|
||||
try:
|
||||
provider = provider_class(config)
|
||||
availability[name] = provider.validate()
|
||||
except Exception:
|
||||
availability[name] = False
|
||||
return availability
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# FILE PROVIDERS
|
||||
# ============================================================================
|
||||
|
||||
class FileProvider(ABC):
|
||||
"""Base class for file upload providers."""
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||
self.config = config or {}
|
||||
self.name = self.__class__.__name__.lower()
|
||||
|
||||
@abstractmethod
|
||||
def upload(self, file_path: str, **kwargs: Any) -> str:
|
||||
"""Upload a file and return the URL."""
|
||||
pass
|
||||
|
||||
def validate(self) -> bool:
|
||||
"""Check if provider is available/configured."""
|
||||
return True
|
||||
|
||||
|
||||
class ZeroXZero(FileProvider):
|
||||
"""File provider for 0x0.st."""
|
||||
|
||||
def upload(self, file_path: str, **kwargs: Any) -> str:
|
||||
from helper.http_client import HTTPClient
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
raise FileNotFoundError(f"File not found: {file_path}")
|
||||
|
||||
try:
|
||||
headers = {"User-Agent": "Medeia-Macina/1.0"}
|
||||
with HTTPClient(headers=headers) as client:
|
||||
with open(file_path, 'rb') as f:
|
||||
response = client.post(
|
||||
"https://0x0.st",
|
||||
files={"file": f}
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
return response.text.strip()
|
||||
else:
|
||||
raise Exception(f"Upload failed: {response.status_code} - {response.text}")
|
||||
|
||||
except Exception as e:
|
||||
log(f"[0x0] Upload error: {e}", file=sys.stderr)
|
||||
raise
|
||||
|
||||
def validate(self) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
class Matrix(FileProvider):
|
||||
"""File provider for Matrix (Element) chat rooms."""
|
||||
|
||||
def validate(self) -> bool:
|
||||
if not self.config:
|
||||
return False
|
||||
matrix_conf = self.config.get('storage', {}).get('matrix', {})
|
||||
return bool(
|
||||
matrix_conf.get('homeserver') and
|
||||
matrix_conf.get('room_id') and
|
||||
(matrix_conf.get('access_token') or matrix_conf.get('password'))
|
||||
)
|
||||
|
||||
def upload(self, file_path: str, **kwargs: Any) -> str:
|
||||
from pathlib import Path
|
||||
|
||||
path = Path(file_path)
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"File not found: {file_path}")
|
||||
|
||||
matrix_conf = self.config.get('storage', {}).get('matrix', {})
|
||||
homeserver = matrix_conf.get('homeserver')
|
||||
access_token = matrix_conf.get('access_token')
|
||||
room_id = matrix_conf.get('room_id')
|
||||
|
||||
if not homeserver.startswith('http'):
|
||||
homeserver = f"https://{homeserver}"
|
||||
|
||||
# Upload media
|
||||
upload_url = f"{homeserver}/_matrix/media/v3/upload"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {access_token}",
|
||||
"Content-Type": "application/octet-stream"
|
||||
}
|
||||
|
||||
mime_type, _ = mimetypes.guess_type(path)
|
||||
if mime_type:
|
||||
headers["Content-Type"] = mime_type
|
||||
|
||||
filename = path.name
|
||||
|
||||
with open(path, 'rb') as f:
|
||||
resp = requests.post(upload_url, headers=headers, data=f, params={"filename": filename})
|
||||
|
||||
if resp.status_code != 200:
|
||||
raise Exception(f"Matrix upload failed: {resp.text}")
|
||||
|
||||
content_uri = resp.json().get('content_uri')
|
||||
if not content_uri:
|
||||
raise Exception("No content_uri returned")
|
||||
|
||||
# Send message
|
||||
send_url = f"{homeserver}/_matrix/client/v3/rooms/{room_id}/send/m.room.message"
|
||||
|
||||
# Determine message type
|
||||
msgtype = "m.file"
|
||||
ext = path.suffix.lower()
|
||||
|
||||
AUDIO_EXTS = {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.wma', '.mka', '.alac'}
|
||||
VIDEO_EXTS = {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}
|
||||
IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff'}
|
||||
|
||||
if ext in AUDIO_EXTS:
|
||||
msgtype = "m.audio"
|
||||
elif ext in VIDEO_EXTS:
|
||||
msgtype = "m.video"
|
||||
elif ext in IMAGE_EXTS:
|
||||
msgtype = "m.image"
|
||||
|
||||
info = {
|
||||
"mimetype": mime_type,
|
||||
"size": path.stat().st_size
|
||||
}
|
||||
|
||||
payload = {
|
||||
"msgtype": msgtype,
|
||||
"body": filename,
|
||||
"url": content_uri,
|
||||
"info": info
|
||||
}
|
||||
|
||||
resp = requests.post(send_url, headers=headers, json=payload)
|
||||
if resp.status_code != 200:
|
||||
raise Exception(f"Matrix send message failed: {resp.text}")
|
||||
|
||||
event_id = resp.json().get('event_id')
|
||||
return f"https://matrix.to/#/{room_id}/{event_id}"
|
||||
|
||||
|
||||
# File provider registry
|
||||
_FILE_PROVIDERS = {
|
||||
"0x0": ZeroXZero,
|
||||
"matrix": Matrix,
|
||||
}
|
||||
|
||||
|
||||
def get_file_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[FileProvider]:
|
||||
"""Get a file provider by name."""
|
||||
provider_class = _FILE_PROVIDERS.get(name.lower())
|
||||
|
||||
if provider_class is None:
|
||||
log(f"[provider] Unknown file provider: {name}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
try:
|
||||
provider = provider_class(config)
|
||||
if not provider.validate():
|
||||
log(f"[provider] File provider '{name}' is not available", file=sys.stderr)
|
||||
return None
|
||||
return provider
|
||||
except Exception as e:
|
||||
log(f"[provider] Error initializing file provider '{name}': {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
def list_file_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
|
||||
"""List all file providers and their availability."""
|
||||
availability = {}
|
||||
for name, provider_class in _FILE_PROVIDERS.items():
|
||||
try:
|
||||
provider = provider_class(config)
|
||||
availability[name] = provider.validate()
|
||||
except Exception:
|
||||
availability[name] = False
|
||||
return availability
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -159,8 +159,8 @@ def create_app():
|
||||
status["storage_path"] = str(STORAGE_PATH)
|
||||
status["storage_exists"] = STORAGE_PATH.exists()
|
||||
try:
|
||||
from helper.local_library import LocalLibraryDB
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
from helper.folder_store import FolderDB
|
||||
with FolderDB(STORAGE_PATH) as db:
|
||||
status["database_accessible"] = True
|
||||
except Exception as e:
|
||||
status["database_accessible"] = False
|
||||
@@ -177,7 +177,7 @@ def create_app():
|
||||
@require_storage()
|
||||
def search_files():
|
||||
"""Search for files by name or tag."""
|
||||
from helper.local_library import LocalLibrarySearchOptimizer
|
||||
from helper.folder_store import LocalLibrarySearchOptimizer
|
||||
|
||||
query = request.args.get('q', '')
|
||||
limit = request.args.get('limit', 100, type=int)
|
||||
@@ -205,11 +205,11 @@ def create_app():
|
||||
@require_storage()
|
||||
def get_file_metadata(file_hash: str):
|
||||
"""Get metadata for a specific file by hash."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
from helper.folder_store import FolderDB
|
||||
|
||||
try:
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_by_hash(file_hash)
|
||||
with FolderDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_hash(file_hash)
|
||||
|
||||
if not file_path or not file_path.exists():
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
@@ -233,13 +233,13 @@ def create_app():
|
||||
@require_storage()
|
||||
def index_file():
|
||||
"""Index a new file in the storage."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
from helper.folder_store import FolderDB
|
||||
from helper.utils import sha256_file
|
||||
|
||||
data = request.get_json() or {}
|
||||
file_path_str = data.get('path')
|
||||
tags = data.get('tags', [])
|
||||
urls = data.get('urls', [])
|
||||
url = data.get('url', [])
|
||||
|
||||
if not file_path_str:
|
||||
return jsonify({"error": "File path required"}), 400
|
||||
@@ -250,14 +250,14 @@ def create_app():
|
||||
if not file_path.exists():
|
||||
return jsonify({"error": "File does not exist"}), 404
|
||||
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
with FolderDB(STORAGE_PATH) as db:
|
||||
db.get_or_create_file_entry(file_path)
|
||||
|
||||
if tags:
|
||||
db.add_tags(file_path, tags)
|
||||
|
||||
if urls:
|
||||
db.add_known_urls(file_path, urls)
|
||||
if url:
|
||||
db.add_url(file_path, url)
|
||||
|
||||
file_hash = sha256_file(file_path)
|
||||
|
||||
@@ -265,7 +265,7 @@ def create_app():
|
||||
"hash": file_hash,
|
||||
"path": str(file_path),
|
||||
"tags_added": len(tags),
|
||||
"urls_added": len(urls)
|
||||
"url_added": len(url)
|
||||
}), 201
|
||||
except Exception as e:
|
||||
logger.error(f"Index error: {e}", exc_info=True)
|
||||
@@ -280,11 +280,11 @@ def create_app():
|
||||
@require_storage()
|
||||
def get_tags(file_hash: str):
|
||||
"""Get tags for a file."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
from helper.folder_store import FolderDB
|
||||
|
||||
try:
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_by_hash(file_hash)
|
||||
with FolderDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_hash(file_hash)
|
||||
if not file_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
@@ -299,7 +299,7 @@ def create_app():
|
||||
@require_storage()
|
||||
def add_tags(file_hash: str):
|
||||
"""Add tags to a file."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
from helper.folder_store import FolderDB
|
||||
|
||||
data = request.get_json() or {}
|
||||
tags = data.get('tags', [])
|
||||
@@ -309,8 +309,8 @@ def create_app():
|
||||
return jsonify({"error": "Tags required"}), 400
|
||||
|
||||
try:
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_by_hash(file_hash)
|
||||
with FolderDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_hash(file_hash)
|
||||
if not file_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
@@ -328,13 +328,13 @@ def create_app():
|
||||
@require_storage()
|
||||
def remove_tags(file_hash: str):
|
||||
"""Remove tags from a file."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
from helper.folder_store import FolderDB
|
||||
|
||||
tags_str = request.args.get('tags', '')
|
||||
|
||||
try:
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_by_hash(file_hash)
|
||||
with FolderDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_hash(file_hash)
|
||||
if not file_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
@@ -358,11 +358,11 @@ def create_app():
|
||||
@require_storage()
|
||||
def get_relationships(file_hash: str):
|
||||
"""Get relationships for a file."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
from helper.folder_store import FolderDB
|
||||
|
||||
try:
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_by_hash(file_hash)
|
||||
with FolderDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_hash(file_hash)
|
||||
if not file_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
@@ -378,7 +378,7 @@ def create_app():
|
||||
@require_storage()
|
||||
def set_relationship():
|
||||
"""Set a relationship between two files."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
from helper.folder_store import FolderDB
|
||||
|
||||
data = request.get_json() or {}
|
||||
from_hash = data.get('from_hash')
|
||||
@@ -389,9 +389,9 @@ def create_app():
|
||||
return jsonify({"error": "from_hash and to_hash required"}), 400
|
||||
|
||||
try:
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
from_path = db.search_by_hash(from_hash)
|
||||
to_path = db.search_by_hash(to_hash)
|
||||
with FolderDB(STORAGE_PATH) as db:
|
||||
from_path = db.search_hash(from_hash)
|
||||
to_path = db.search_hash(to_hash)
|
||||
|
||||
if not from_path or not to_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
@@ -406,49 +406,49 @@ def create_app():
|
||||
# URL OPERATIONS
|
||||
# ========================================================================
|
||||
|
||||
@app.route('/urls/<file_hash>', methods=['GET'])
|
||||
@app.route('/url/<file_hash>', methods=['GET'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def get_urls(file_hash: str):
|
||||
"""Get known URLs for a file."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
def get_url(file_hash: str):
|
||||
"""Get known url for a file."""
|
||||
from helper.folder_store import FolderDB
|
||||
|
||||
try:
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_by_hash(file_hash)
|
||||
with FolderDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_hash(file_hash)
|
||||
if not file_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
metadata = db.get_metadata(file_path)
|
||||
urls = metadata.get('known_urls', []) if metadata else []
|
||||
return jsonify({"hash": file_hash, "urls": urls}), 200
|
||||
url = metadata.get('url', []) if metadata else []
|
||||
return jsonify({"hash": file_hash, "url": url}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Get URLs error: {e}", exc_info=True)
|
||||
logger.error(f"Get url error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed: {str(e)}"}), 500
|
||||
|
||||
@app.route('/urls/<file_hash>', methods=['POST'])
|
||||
@app.route('/url/<file_hash>', methods=['POST'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def add_urls(file_hash: str):
|
||||
"""Add URLs to a file."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
def add_url(file_hash: str):
|
||||
"""Add url to a file."""
|
||||
from helper.folder_store import FolderDB
|
||||
|
||||
data = request.get_json() or {}
|
||||
urls = data.get('urls', [])
|
||||
url = data.get('url', [])
|
||||
|
||||
if not urls:
|
||||
return jsonify({"error": "URLs required"}), 400
|
||||
if not url:
|
||||
return jsonify({"error": "url required"}), 400
|
||||
|
||||
try:
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_by_hash(file_hash)
|
||||
with FolderDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_hash(file_hash)
|
||||
if not file_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
db.add_known_urls(file_path, urls)
|
||||
return jsonify({"hash": file_hash, "urls_added": len(urls)}), 200
|
||||
db.add_url(file_path, url)
|
||||
return jsonify({"hash": file_hash, "url_added": len(url)}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Add URLs error: {e}", exc_info=True)
|
||||
logger.error(f"Add url error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed: {str(e)}"}), 500
|
||||
|
||||
return app
|
||||
@@ -509,8 +509,8 @@ def main():
|
||||
print(f"\n{'='*70}\n")
|
||||
|
||||
try:
|
||||
from helper.local_library import LocalLibraryDB
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
from helper.folder_store import FolderDB
|
||||
with FolderDB(STORAGE_PATH) as db:
|
||||
logger.info("Database initialized successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize database: {e}")
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
2268
helper/store.py
Normal file
2268
helper/store.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -555,7 +555,7 @@ class UnifiedBookDownloader:
|
||||
This follows the exact process from archive_client.py:
|
||||
1. Login with credentials
|
||||
2. Call loan() to create 14-day borrow
|
||||
3. Get book info (extract page URLs)
|
||||
3. Get book info (extract page url)
|
||||
4. Download all pages as images
|
||||
5. Merge images into searchable PDF
|
||||
|
||||
@@ -576,10 +576,10 @@ class UnifiedBookDownloader:
|
||||
# If we get here, borrowing succeeded
|
||||
logger.info(f"[UnifiedBookDownloader] Successfully borrowed book: {book_id}")
|
||||
|
||||
# Now get the book info (page URLs and metadata)
|
||||
# Now get the book info (page url and metadata)
|
||||
logger.info(f"[UnifiedBookDownloader] Extracting book page information...")
|
||||
# Try both URL formats: with /borrow and without
|
||||
book_urls = [
|
||||
book_url = [
|
||||
f"https://archive.org/borrow/{book_id}", # Try borrow page first (for borrowed books)
|
||||
f"https://archive.org/details/{book_id}" # Fallback to details page
|
||||
]
|
||||
@@ -589,7 +589,7 @@ class UnifiedBookDownloader:
|
||||
metadata = None
|
||||
last_error = None
|
||||
|
||||
for book_url in book_urls:
|
||||
for book_url in book_url:
|
||||
try:
|
||||
logger.debug(f"[UnifiedBookDownloader] Trying to get book info from: {book_url}")
|
||||
response = session.get(book_url, timeout=10)
|
||||
@@ -611,7 +611,7 @@ class UnifiedBookDownloader:
|
||||
continue
|
||||
|
||||
if links is None:
|
||||
logger.error(f"[UnifiedBookDownloader] Failed to get book info from all URLs: {last_error}")
|
||||
logger.error(f"[UnifiedBookDownloader] Failed to get book info from all url: {last_error}")
|
||||
# Borrow extraction failed - return False
|
||||
return False, "Could not extract borrowed book pages"
|
||||
|
||||
|
||||
@@ -308,7 +308,7 @@ def format_metadata_value(key: str, value) -> str:
|
||||
# ============================================================================
|
||||
# Link Utilities - Consolidated from link_utils.py
|
||||
# ============================================================================
|
||||
"""Link utilities - Extract and process URLs from various sources."""
|
||||
"""Link utilities - Extract and process url from various sources."""
|
||||
|
||||
|
||||
def extract_link_from_args(args: Iterable[str]) -> Any | None:
|
||||
|
||||
@@ -77,3 +77,26 @@ mime_maps = {
|
||||
"csv": { "ext": ".csv", "mimes": ["text/csv"] }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def get_type_from_ext(ext: str) -> str:
|
||||
"""Determine the type (e.g., 'image', 'video', 'audio') from file extension.
|
||||
|
||||
Args:
|
||||
ext: File extension (with or without leading dot, e.g., 'jpg' or '.jpg')
|
||||
|
||||
Returns:
|
||||
Type string (e.g., 'image', 'video', 'audio') or 'other' if unknown
|
||||
"""
|
||||
if not ext:
|
||||
return 'other'
|
||||
|
||||
# Normalize: remove leading dot and convert to lowercase
|
||||
ext_clean = ext.lstrip('.').lower()
|
||||
|
||||
# Search through mime_maps to find matching type
|
||||
for type_name, extensions_dict in mime_maps.items():
|
||||
if ext_clean in extensions_dict:
|
||||
return type_name
|
||||
|
||||
return 'other'
|
||||
|
||||
@@ -11,7 +11,7 @@ from datetime import datetime
|
||||
from threading import Thread, Lock
|
||||
import time
|
||||
|
||||
from .local_library import LocalLibraryDB
|
||||
from .folder_store import FolderDB
|
||||
from helper.logger import log
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -140,7 +140,7 @@ class Worker:
|
||||
class WorkerLoggingHandler(logging.StreamHandler):
|
||||
"""Custom logging handler that captures logs for a worker."""
|
||||
|
||||
def __init__(self, worker_id: str, db: LocalLibraryDB,
|
||||
def __init__(self, worker_id: str, db: FolderDB,
|
||||
manager: Optional['WorkerManager'] = None,
|
||||
buffer_size: int = 50):
|
||||
"""Initialize the handler.
|
||||
@@ -235,7 +235,7 @@ class WorkerManager:
|
||||
auto_refresh_interval: Seconds between auto-refresh checks (0 = disabled)
|
||||
"""
|
||||
self.library_root = Path(library_root)
|
||||
self.db = LocalLibraryDB(library_root)
|
||||
self.db = FolderDB(library_root)
|
||||
self.auto_refresh_interval = auto_refresh_interval
|
||||
self.refresh_callbacks: List[Callable] = []
|
||||
self.refresh_thread: Optional[Thread] = None
|
||||
@@ -244,6 +244,22 @@ class WorkerManager:
|
||||
self.worker_handlers: Dict[str, WorkerLoggingHandler] = {} # Track active handlers
|
||||
self._worker_last_step: Dict[str, str] = {}
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the database connection."""
|
||||
if self.db:
|
||||
try:
|
||||
self.db.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def __enter__(self):
|
||||
"""Context manager entry."""
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Context manager exit - close database."""
|
||||
self.close()
|
||||
|
||||
def add_refresh_callback(self, callback: Callable[[List[Dict[str, Any]]], None]) -> None:
|
||||
"""Register a callback to be called on worker updates.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user