AST
This commit is contained in:
42
.gitattributes
vendored
Normal file
42
.gitattributes
vendored
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
# Auto detect text files and normalize line endings to LF
|
||||||
|
* text=auto
|
||||||
|
|
||||||
|
# Python files
|
||||||
|
*.py text eol=lf
|
||||||
|
*.pyx text eol=lf
|
||||||
|
*.pyi text eol=lf
|
||||||
|
|
||||||
|
# Shell scripts
|
||||||
|
*.sh text eol=lf
|
||||||
|
*.bash text eol=lf
|
||||||
|
|
||||||
|
# Windows batch files
|
||||||
|
*.bat text eol=crlf
|
||||||
|
*.cmd text eol=crlf
|
||||||
|
*.ps1 text eol=crlf
|
||||||
|
|
||||||
|
# Config files
|
||||||
|
*.json text eol=lf
|
||||||
|
*.toml text eol=lf
|
||||||
|
*.yaml text eol=lf
|
||||||
|
*.yml text eol=lf
|
||||||
|
*.ini text eol=lf
|
||||||
|
*.cfg text eol=lf
|
||||||
|
|
||||||
|
# Documentation
|
||||||
|
*.md text eol=lf
|
||||||
|
README text eol=lf
|
||||||
|
LICENSE text eol=lf
|
||||||
|
|
||||||
|
# Binary files
|
||||||
|
*.db binary
|
||||||
|
*.sqlite binary
|
||||||
|
*.png binary
|
||||||
|
*.jpg binary
|
||||||
|
*.jpeg binary
|
||||||
|
*.gif binary
|
||||||
|
*.webp binary
|
||||||
|
*.mov binary
|
||||||
|
*.mp4 binary
|
||||||
|
*.webm binary
|
||||||
|
*.pdf binary
|
||||||
219
.gitignore copy
Normal file
219
.gitignore copy
Normal file
@@ -0,0 +1,219 @@
|
|||||||
|
# ---> Python
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
config.json
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
config.json
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
# .python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
#Pipfile.lock
|
||||||
|
|
||||||
|
# UV
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
#uv.lock
|
||||||
|
|
||||||
|
# poetry
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||||
|
#poetry.lock
|
||||||
|
|
||||||
|
# pdm
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||||
|
#pdm.lock
|
||||||
|
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||||
|
# in version control.
|
||||||
|
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
||||||
|
.pdm.toml
|
||||||
|
.pdm-python
|
||||||
|
.pdm-build/
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||||
|
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
|
#.idea/
|
||||||
|
|
||||||
|
# Ruff stuff:
|
||||||
|
.ruff_cache/
|
||||||
|
|
||||||
|
# PyPI configuration file
|
||||||
|
.pypirc
|
||||||
|
|
||||||
|
# ---> Lua
|
||||||
|
# Compiled Lua sources
|
||||||
|
luac.out
|
||||||
|
|
||||||
|
# luarocks build files
|
||||||
|
*.src.rock
|
||||||
|
*.zip
|
||||||
|
*.tar.gz
|
||||||
|
|
||||||
|
# Object files
|
||||||
|
*.o
|
||||||
|
*.os
|
||||||
|
*.ko
|
||||||
|
*.obj
|
||||||
|
*.elf
|
||||||
|
|
||||||
|
# Precompiled Headers
|
||||||
|
*.gch
|
||||||
|
*.pch
|
||||||
|
|
||||||
|
# Libraries
|
||||||
|
*.lib
|
||||||
|
*.a
|
||||||
|
*.la
|
||||||
|
*.lo
|
||||||
|
*.def
|
||||||
|
*.exp
|
||||||
|
|
||||||
|
# Shared objects (inc. Windows DLLs)
|
||||||
|
*.dll
|
||||||
|
*.so
|
||||||
|
*.so.*
|
||||||
|
*.dylib
|
||||||
|
|
||||||
|
# Executables
|
||||||
|
*.exe
|
||||||
|
*.out
|
||||||
|
*.app
|
||||||
|
*.i*86
|
||||||
|
*.x86_64
|
||||||
|
*.hex
|
||||||
|
|
||||||
|
|
||||||
64
README copy.md
Normal file
64
README copy.md
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
# Medeia-Macina
|
||||||
|
|
||||||
|
A powerful CLI media management and search platform integrating local files, Hydrus, torrents, books, and P2P networks.
|
||||||
|
|
||||||
|
## Key Features
|
||||||
|
* **Unified Search**: Search across Local, Hydrus, LibGen, Soulseek, and Debrid.
|
||||||
|
* **Pipeline Architecture**: Chain commands like PowerShell (e.g., `search | filter | download`).
|
||||||
|
* **Smart Selection**: Use `@N` syntax to interact with results.
|
||||||
|
* **Metadata Management**: Tagging, notes, and relationships.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
1. Install Python 3.9+ and [Deno](https://deno.com/) (for YouTube support).
|
||||||
|
2. Install dependencies: `pip install -r requirements.txt`
|
||||||
|
3. Run the CLI: `python CLI.py`
|
||||||
|
|
||||||
|
## Command Examples
|
||||||
|
|
||||||
|
### Search & Download
|
||||||
|
```powershell
|
||||||
|
# Search and download the first result
|
||||||
|
search-file "daughter" | @1 | download-data
|
||||||
|
|
||||||
|
# Search specific provider and download
|
||||||
|
search-file -provider libgen "dune" | @1 | download-data
|
||||||
|
|
||||||
|
# Download YouTube video (auto-probes formats)
|
||||||
|
download-data "https://youtube.com/watch?v=..."
|
||||||
|
# Select format #2 from the list
|
||||||
|
@2 | download-data
|
||||||
|
```
|
||||||
|
|
||||||
|
### File Management
|
||||||
|
```powershell
|
||||||
|
# Add file to Hydrus
|
||||||
|
add-file -path "C:\Videos\movie.mp4" -storage hydrus
|
||||||
|
|
||||||
|
# Upload to 0x0.st and associate URL with Hydrus file
|
||||||
|
search-file "my_video" | @1 | add-file -provider 0x0
|
||||||
|
|
||||||
|
# Add tags to a file
|
||||||
|
search-file "video" | @1 | add-tag "creator:someone, character:hero"
|
||||||
|
|
||||||
|
# Use tag lists (from helper/adjective.json)
|
||||||
|
@1 | add-tag "{gnostic}"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Metadata & Notes
|
||||||
|
```powershell
|
||||||
|
# Add a note
|
||||||
|
search-file "doc" | @1 | add-note "comment" "This is important"
|
||||||
|
|
||||||
|
# Get tags
|
||||||
|
search-file "image" | @1 | get-tag
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pipeline Syntax
|
||||||
|
* `|` : Pipe results from one command to another.
|
||||||
|
* `@N` : Select the Nth item from the previous result (e.g., `@1`).
|
||||||
|
* `@N-M` : Select a range (e.g., `@1-5`).
|
||||||
|
* `@{1,3,5}` : Select specific items.
|
||||||
|
* `@*` : Select all items.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
Edit `config.json` to set API keys (AllDebrid, OpenAI), storage paths, and Hydrus credentials.
|
||||||
1
TUI/__init__.py
Normal file
1
TUI/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Medeia-Macina TUI - Terminal User Interface."""
|
||||||
105
TUI/menu_actions.py
Normal file
105
TUI/menu_actions.py
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
"""Utilities that drive the modern Textual UI menus and presets."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, Iterable, List, Optional, Sequence
|
||||||
|
|
||||||
|
BASE_DIR = Path(__file__).resolve().parent
|
||||||
|
ROOT_DIR = BASE_DIR.parent
|
||||||
|
for path in (ROOT_DIR, BASE_DIR):
|
||||||
|
str_path = str(path)
|
||||||
|
if str_path not in sys.path:
|
||||||
|
sys.path.insert(0, str_path)
|
||||||
|
|
||||||
|
import metadata
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class PipelinePreset:
|
||||||
|
"""Simple descriptor for a reusable pipeline."""
|
||||||
|
|
||||||
|
label: str
|
||||||
|
description: str
|
||||||
|
pipeline: str
|
||||||
|
|
||||||
|
|
||||||
|
PIPELINE_PRESETS: List[PipelinePreset] = [
|
||||||
|
PipelinePreset(
|
||||||
|
label="Download → Merge → Local",
|
||||||
|
description="Use download-data with playlist auto-selection, merge the pieces, tag, then import into local storage.",
|
||||||
|
pipeline='download-data "<url>" | merge-file | add-tag | add-file -storage local',
|
||||||
|
),
|
||||||
|
PipelinePreset(
|
||||||
|
label="Download → Hydrus",
|
||||||
|
description="Fetch media, auto-tag, and push directly into Hydrus.",
|
||||||
|
pipeline='download-data "<url>" | merge-file | add-tag | add-file -storage hydrus',
|
||||||
|
),
|
||||||
|
PipelinePreset(
|
||||||
|
label="Search Local Library",
|
||||||
|
description="Run search-file against the local library and emit a result table for further piping.",
|
||||||
|
pipeline='search-file -library local -query "<keywords>"',
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def load_tags(file_path: Path) -> List[str]:
|
||||||
|
"""Read tags for a file using metadata.py as the single source of truth."""
|
||||||
|
|
||||||
|
try:
|
||||||
|
return metadata.read_tags_from_file(file_path)
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def group_tags_by_namespace(tags: Sequence[str]) -> Dict[str, List[str]]:
|
||||||
|
"""Return tags grouped by namespace for quick UI summaries."""
|
||||||
|
|
||||||
|
grouped: Dict[str, List[str]] = {}
|
||||||
|
for tag in metadata.normalize_tags(list(tags)):
|
||||||
|
namespace, value = metadata.split_tag(tag)
|
||||||
|
key = namespace or "_untagged"
|
||||||
|
grouped.setdefault(key, []).append(value)
|
||||||
|
|
||||||
|
for items in grouped.values():
|
||||||
|
items.sort()
|
||||||
|
return grouped
|
||||||
|
|
||||||
|
|
||||||
|
def build_metadata_snapshot(file_path: Path) -> Dict[str, Any]:
|
||||||
|
"""Load any available sidecar metadata for the selected file."""
|
||||||
|
|
||||||
|
snapshot: Dict[str, Any] = {
|
||||||
|
"file": str(file_path),
|
||||||
|
"tags": group_tags_by_namespace(load_tags(file_path)),
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
sidecar = metadata._derive_sidecar_path(file_path)
|
||||||
|
if sidecar.is_file():
|
||||||
|
title, tags, notes = metadata._read_sidecar_metadata(sidecar)
|
||||||
|
snapshot["sidecar"] = {
|
||||||
|
"title": title,
|
||||||
|
"tags": group_tags_by_namespace(tags),
|
||||||
|
"notes": notes,
|
||||||
|
}
|
||||||
|
except Exception:
|
||||||
|
snapshot["sidecar"] = None
|
||||||
|
|
||||||
|
return snapshot
|
||||||
|
|
||||||
|
|
||||||
|
def summarize_result(result: Dict[str, Any]) -> str:
|
||||||
|
"""Build a one-line summary for a pipeline result row."""
|
||||||
|
|
||||||
|
title = result.get("title") or result.get("identifier") or result.get("file_path")
|
||||||
|
source = result.get("source") or result.get("cmdlet") or "result"
|
||||||
|
return f"{source}: {title}" if title else source
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_tags(tags: Iterable[str]) -> List[str]:
|
||||||
|
"""Expose metadata.normalize_tags for callers that imported the old helper."""
|
||||||
|
|
||||||
|
return metadata.normalize_tags(list(tags))
|
||||||
7
TUI/modalscreen/__init__.py
Normal file
7
TUI/modalscreen/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
"""Modal screens for the Downlow Hub UI application."""
|
||||||
|
|
||||||
|
from .export import ExportModal
|
||||||
|
from .search import SearchModal
|
||||||
|
from .workers import WorkersModal
|
||||||
|
|
||||||
|
__all__ = ["ExportModal", "SearchModal", "WorkersModal"]
|
||||||
139
TUI/modalscreen/access.py
Normal file
139
TUI/modalscreen/access.py
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
"""Modal for displaying files/URLs to access in web mode."""
|
||||||
|
|
||||||
|
from textual.screen import ModalScreen
|
||||||
|
from textual.containers import Container, Vertical, Horizontal
|
||||||
|
from textual.widgets import Static, Button, Label
|
||||||
|
from textual.app import ComposeResult
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class AccessModal(ModalScreen):
|
||||||
|
"""Modal to display a file/URL that can be accessed from phone browser."""
|
||||||
|
|
||||||
|
CSS = """
|
||||||
|
Screen {
|
||||||
|
align: center middle;
|
||||||
|
}
|
||||||
|
|
||||||
|
#access-container {
|
||||||
|
width: 80;
|
||||||
|
height: auto;
|
||||||
|
border: thick $primary;
|
||||||
|
background: $surface;
|
||||||
|
}
|
||||||
|
|
||||||
|
#access-header {
|
||||||
|
dock: top;
|
||||||
|
height: 3;
|
||||||
|
background: $boost;
|
||||||
|
border-bottom: solid $accent;
|
||||||
|
content-align: center middle;
|
||||||
|
}
|
||||||
|
|
||||||
|
#access-content {
|
||||||
|
height: auto;
|
||||||
|
width: 1fr;
|
||||||
|
padding: 1 2;
|
||||||
|
border-bottom: solid $accent;
|
||||||
|
}
|
||||||
|
|
||||||
|
#access-footer {
|
||||||
|
dock: bottom;
|
||||||
|
height: 3;
|
||||||
|
background: $boost;
|
||||||
|
border-top: solid $accent;
|
||||||
|
align: center middle;
|
||||||
|
}
|
||||||
|
|
||||||
|
.access-url {
|
||||||
|
width: 1fr;
|
||||||
|
height: auto;
|
||||||
|
margin-bottom: 1;
|
||||||
|
border: solid $accent;
|
||||||
|
padding: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.access-label {
|
||||||
|
width: 1fr;
|
||||||
|
height: auto;
|
||||||
|
margin-bottom: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Button {
|
||||||
|
margin-right: 1;
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, title: str, content: str, is_url: bool = False):
|
||||||
|
"""Initialize access modal.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
title: Title of the item being accessed
|
||||||
|
content: The URL or file path
|
||||||
|
is_url: Whether this is a URL (True) or file path (False)
|
||||||
|
"""
|
||||||
|
super().__init__()
|
||||||
|
self.item_title = title
|
||||||
|
self.item_content = content
|
||||||
|
self.is_url = is_url
|
||||||
|
|
||||||
|
def compose(self) -> ComposeResult:
|
||||||
|
"""Create the modal layout."""
|
||||||
|
with Container(id="access-container"):
|
||||||
|
with Vertical(id="access-header"):
|
||||||
|
yield Label(f"[bold]{self.item_title}[/bold]")
|
||||||
|
yield Label("[dim]Click link below to open in your browser[/dim]")
|
||||||
|
|
||||||
|
with Vertical(id="access-content"):
|
||||||
|
if self.is_url:
|
||||||
|
yield Label("[bold cyan]Link:[/bold cyan]", classes="access-label")
|
||||||
|
else:
|
||||||
|
yield Label("[bold cyan]File:[/bold cyan]", classes="access-label")
|
||||||
|
|
||||||
|
# Display as clickable link using HTML link element for web mode
|
||||||
|
# Rich link markup `[link=URL]` has parsing issues with URLs containing special chars
|
||||||
|
# Instead, use the HTML link markup that Textual-serve renders as <a> tag
|
||||||
|
# Format: [link=URL "tooltip"]text[/link] - the quotes help with parsing
|
||||||
|
link_text = f'[link="{self.item_content}"]Open in Browser[/link]'
|
||||||
|
content_box = Static(link_text, classes="access-url")
|
||||||
|
yield content_box
|
||||||
|
|
||||||
|
# Also show the URL for reference/copying
|
||||||
|
yield Label(self.item_content, classes="access-label")
|
||||||
|
|
||||||
|
yield Label("\n[yellow]↑ Click the link above to open on your device[/yellow]", classes="access-label")
|
||||||
|
|
||||||
|
with Horizontal(id="access-footer"):
|
||||||
|
yield Button("Copy URL", id="copy-btn", variant="primary")
|
||||||
|
yield Button("Close", id="close-btn", variant="default")
|
||||||
|
|
||||||
|
def on_button_pressed(self, event: Button.Pressed) -> None:
|
||||||
|
"""Handle button presses."""
|
||||||
|
if event.button.id == "copy-btn":
|
||||||
|
# Copy to clipboard (optional - not critical if fails)
|
||||||
|
logger.info(f"Attempting to copy: {self.item_content}")
|
||||||
|
try:
|
||||||
|
# Try to use pyperclip if available
|
||||||
|
try:
|
||||||
|
import pyperclip
|
||||||
|
pyperclip.copy(self.item_content)
|
||||||
|
logger.info("URL copied to clipboard via pyperclip")
|
||||||
|
except ImportError:
|
||||||
|
# Fallback: try xclip on Linux or pbcopy on Mac
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
if sys.platform == "win32":
|
||||||
|
# Windows: use clipboard via pyperclip (already tried)
|
||||||
|
logger.debug("Windows clipboard not available without pyperclip")
|
||||||
|
else:
|
||||||
|
# Linux/Mac
|
||||||
|
process = subprocess.Popen(['xclip', '-selection', 'clipboard'], stdin=subprocess.PIPE)
|
||||||
|
process.communicate(self.item_content.encode('utf-8'))
|
||||||
|
logger.info("URL copied to clipboard via xclip")
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Clipboard copy not available: {e}")
|
||||||
|
# Not critical - just informational
|
||||||
|
elif event.button.id == "close-btn":
|
||||||
|
self.dismiss()
|
||||||
1880
TUI/modalscreen/download.py
Normal file
1880
TUI/modalscreen/download.py
Normal file
File diff suppressed because it is too large
Load Diff
183
TUI/modalscreen/download.tcss
Normal file
183
TUI/modalscreen/download.tcss
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
/* Download Modal Screen Stylesheet */
|
||||||
|
|
||||||
|
Screen {
|
||||||
|
background: $surface;
|
||||||
|
overlay: screen;
|
||||||
|
}
|
||||||
|
|
||||||
|
#download_modal {
|
||||||
|
width: 100%;
|
||||||
|
height: 100%;
|
||||||
|
border: heavy $primary;
|
||||||
|
background: $boost;
|
||||||
|
}
|
||||||
|
|
||||||
|
#download_title {
|
||||||
|
dock: top;
|
||||||
|
height: 1;
|
||||||
|
content-align: center middle;
|
||||||
|
background: $primary;
|
||||||
|
color: $text;
|
||||||
|
text-style: bold;
|
||||||
|
padding: 0 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Main horizontal layout: 2 columns left/right split */
|
||||||
|
#main_layout {
|
||||||
|
width: 1fr;
|
||||||
|
height: 1fr;
|
||||||
|
layout: horizontal;
|
||||||
|
padding: 1;
|
||||||
|
border: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Left column */
|
||||||
|
#left_column {
|
||||||
|
width: 2fr;
|
||||||
|
height: 1fr;
|
||||||
|
layout: vertical;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Right column */
|
||||||
|
#right_column {
|
||||||
|
width: 1fr;
|
||||||
|
height: 1fr;
|
||||||
|
layout: vertical;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* All containers styling */
|
||||||
|
.grid_container {
|
||||||
|
width: 1fr;
|
||||||
|
height: 1fr;
|
||||||
|
padding: 1;
|
||||||
|
layout: vertical;
|
||||||
|
margin: 0 0 1 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#tags_container {
|
||||||
|
border: mediumpurple;
|
||||||
|
}
|
||||||
|
|
||||||
|
#url_container {
|
||||||
|
border: solid $accent;
|
||||||
|
}
|
||||||
|
|
||||||
|
#files_container {
|
||||||
|
border: solid $accent;
|
||||||
|
}
|
||||||
|
|
||||||
|
#playlist_container {
|
||||||
|
border: solid $accent;
|
||||||
|
layout: vertical;
|
||||||
|
height: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#playlist_tree {
|
||||||
|
width: 1fr;
|
||||||
|
height: auto;
|
||||||
|
border: none;
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#playlist_input {
|
||||||
|
width: 1fr;
|
||||||
|
height: 1;
|
||||||
|
border: none;
|
||||||
|
padding: 0 1;
|
||||||
|
margin: 1 0 0 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#playlist_input_row {
|
||||||
|
width: 1fr;
|
||||||
|
height: auto;
|
||||||
|
layout: horizontal;
|
||||||
|
margin: 1 0 0 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.section_title {
|
||||||
|
width: 1fr;
|
||||||
|
height: 1;
|
||||||
|
text-align: left;
|
||||||
|
color: $text-muted;
|
||||||
|
text-style: bold;
|
||||||
|
margin: 0 0 0 0;
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* TextArea widgets in containers */
|
||||||
|
#tags_textarea {
|
||||||
|
width: 1fr;
|
||||||
|
height: 1fr;
|
||||||
|
border: none;
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#paragraph_textarea {
|
||||||
|
width: 1fr;
|
||||||
|
height: 1fr;
|
||||||
|
border: none;
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Select widgets in containers */
|
||||||
|
#files_select {
|
||||||
|
width: 1fr;
|
||||||
|
height: 1fr;
|
||||||
|
border: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Footer layout - horizontal: checkboxes left, source middle, buttons right */
|
||||||
|
#footer_layout {
|
||||||
|
width: 1fr;
|
||||||
|
height: auto;
|
||||||
|
layout: horizontal;
|
||||||
|
padding: 1;
|
||||||
|
margin: 0;
|
||||||
|
background: $boost;
|
||||||
|
}
|
||||||
|
|
||||||
|
#checkbox_row {
|
||||||
|
width: auto;
|
||||||
|
height: auto;
|
||||||
|
layout: horizontal;
|
||||||
|
align: left middle;
|
||||||
|
}
|
||||||
|
|
||||||
|
#source_select {
|
||||||
|
width: 30;
|
||||||
|
height: 1;
|
||||||
|
border: none;
|
||||||
|
padding: 0 1;
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#button_row {
|
||||||
|
width: auto;
|
||||||
|
height: auto;
|
||||||
|
layout: horizontal;
|
||||||
|
align: right middle;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Progress bar - shown during download */
|
||||||
|
#progress_bar {
|
||||||
|
width: 1fr;
|
||||||
|
height: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Checkbox and Button styling */
|
||||||
|
Checkbox {
|
||||||
|
margin: 0 2 0 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
Button {
|
||||||
|
margin: 0 1 0 0;
|
||||||
|
width: 12;
|
||||||
|
}
|
||||||
|
|
||||||
|
#cancel_btn {
|
||||||
|
width: 12;
|
||||||
|
}
|
||||||
|
|
||||||
|
#submit_btn {
|
||||||
|
width: 12;
|
||||||
|
}
|
||||||
512
TUI/modalscreen/export.py
Normal file
512
TUI/modalscreen/export.py
Normal file
@@ -0,0 +1,512 @@
|
|||||||
|
"""Export modal screen for exporting files with metadata."""
|
||||||
|
|
||||||
|
from textual.app import ComposeResult
|
||||||
|
from textual.screen import ModalScreen
|
||||||
|
from textual.containers import Container, Horizontal, Vertical
|
||||||
|
from textual.widgets import Static, Button, Input, TextArea, Tree, Select
|
||||||
|
from textual.binding import Binding
|
||||||
|
import logging
|
||||||
|
from typing import Optional, Any
|
||||||
|
from pathlib import Path
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
import subprocess
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# Add parent directory to path for imports
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
from helper.utils import format_metadata_value
|
||||||
|
from config import load_config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class ExportModal(ModalScreen):
|
||||||
|
"""Modal screen for exporting files with metadata and tags."""
|
||||||
|
|
||||||
|
BINDINGS = [
|
||||||
|
Binding("escape", "cancel", "Cancel"),
|
||||||
|
]
|
||||||
|
|
||||||
|
CSS_PATH = "export.tcss"
|
||||||
|
|
||||||
|
def __init__(self, result_data: Optional[dict] = None, hydrus_available: bool = False, debrid_available: bool = False):
|
||||||
|
"""Initialize the export modal with result data.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
result_data: Dictionary containing:
|
||||||
|
- title: str - Item title
|
||||||
|
- tags: str - Comma-separated tags
|
||||||
|
- metadata: dict - File metadata (source-specific from item.metadata or local DB)
|
||||||
|
- source: str - Source identifier ('local', 'hydrus', 'debrid', etc)
|
||||||
|
- current_result: object - The full search result object
|
||||||
|
hydrus_available: bool - Whether Hydrus API is available
|
||||||
|
debrid_available: bool - Whether Debrid API is available
|
||||||
|
"""
|
||||||
|
super().__init__()
|
||||||
|
self.result_data = result_data or {}
|
||||||
|
self.hydrus_available = hydrus_available
|
||||||
|
self.debrid_available = debrid_available
|
||||||
|
self.metadata_display: Optional[Static] = None
|
||||||
|
self.tags_textarea: Optional[TextArea] = None
|
||||||
|
self.export_to_select: Optional[Select] = None
|
||||||
|
self.custom_path_input: Optional[Input] = None
|
||||||
|
self.libraries_select: Optional[Select] = None
|
||||||
|
self.size_input: Optional[Input] = None
|
||||||
|
self.format_select: Optional[Select] = None
|
||||||
|
self.file_ext: Optional[str] = None # Store the file extension for format filtering
|
||||||
|
self.file_type: Optional[str] = None # Store the file type (audio, video, image, document)
|
||||||
|
self.default_format: Optional[str] = None # Store the default format to set after mount
|
||||||
|
|
||||||
|
def _determine_file_type(self, ext: str) -> tuple[str, list]:
|
||||||
|
"""Determine file type from extension and return type and format options.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ext: File extension (e.g., '.mp3', '.mp4', '.jpg')
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (file_type, format_options) where format_options is a list of (label, value) tuples
|
||||||
|
"""
|
||||||
|
ext_lower = ext.lower() if ext else ''
|
||||||
|
|
||||||
|
# Audio formats
|
||||||
|
audio_exts = {'.mp3', '.flac', '.wav', '.aac', '.ogg', '.m4a', '.wma', '.opus', '.mka'}
|
||||||
|
audio_formats = [("MKA", "mka"), ("MP3", "mp3"), ("M4A", "m4a"), ("FLAC", "flac"), ("WAV", "wav"), ("AAC", "aac"), ("OGG", "ogg"), ("Opus", "opus")]
|
||||||
|
|
||||||
|
# Video formats (can have audio too)
|
||||||
|
video_exts = {'.mp4', '.mkv', '.webm', '.avi', '.mov', '.flv', '.wmv', '.m4v', '.ts', '.mpg', '.mpeg'}
|
||||||
|
video_formats = [("MP4", "mp4"), ("MKV", "mkv"), ("WebM", "webm"), ("AVI", "avi"), ("MOV", "mov")]
|
||||||
|
|
||||||
|
# Image formats
|
||||||
|
image_exts = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff', '.ico'}
|
||||||
|
image_formats = [("JPG", "jpg"), ("PNG", "png"), ("WebP", "webp"), ("GIF", "gif"), ("BMP", "bmp")]
|
||||||
|
|
||||||
|
# Document formats - no conversion for now
|
||||||
|
document_exts = {'.pdf', '.epub', '.txt', '.docx', '.doc', '.rtf', '.md', '.html', '.mobi', '.cbz', '.cbr'}
|
||||||
|
document_formats = []
|
||||||
|
|
||||||
|
if ext_lower in audio_exts:
|
||||||
|
return ('audio', audio_formats)
|
||||||
|
elif ext_lower in video_exts:
|
||||||
|
return ('video', video_formats)
|
||||||
|
elif ext_lower in image_exts:
|
||||||
|
return ('image', image_formats)
|
||||||
|
elif ext_lower in document_exts:
|
||||||
|
return ('document', document_formats)
|
||||||
|
else:
|
||||||
|
# Default to audio if unknown
|
||||||
|
return ('unknown', audio_formats)
|
||||||
|
|
||||||
|
def _get_library_options(self) -> list:
|
||||||
|
"""Get available library options from config.json."""
|
||||||
|
options = [("Local", "local")]
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Try to load config
|
||||||
|
config_path = Path(__file__).parent.parent / "config.json"
|
||||||
|
if not config_path.exists():
|
||||||
|
return options
|
||||||
|
|
||||||
|
with open(config_path, 'r') as f:
|
||||||
|
config = json.load(f)
|
||||||
|
|
||||||
|
# Check if Hydrus is configured AND available (supports both new and old format)
|
||||||
|
from config import get_hydrus_instance
|
||||||
|
hydrus_instance = get_hydrus_instance(config, "home")
|
||||||
|
if self.hydrus_available and hydrus_instance and hydrus_instance.get("key") and hydrus_instance.get("url"):
|
||||||
|
options.append(("Hydrus Network", "hydrus"))
|
||||||
|
|
||||||
|
# Check if Debrid is configured AND available (supports both new and old format)
|
||||||
|
from config import get_debrid_api_key
|
||||||
|
debrid_api_key = get_debrid_api_key(config)
|
||||||
|
if self.debrid_available and debrid_api_key:
|
||||||
|
options.append(("Debrid", "debrid"))
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error loading config for libraries: {e}")
|
||||||
|
|
||||||
|
return options
|
||||||
|
|
||||||
|
def _get_metadata_text(self) -> str:
|
||||||
|
"""Format metadata from result data in a consistent display format."""
|
||||||
|
metadata = self.result_data.get('metadata', {})
|
||||||
|
source = self.result_data.get('source', 'unknown')
|
||||||
|
logger.info(f"_get_metadata_text called - source: {source}, metadata type: {type(metadata)}, keys: {list(metadata.keys()) if metadata else 'empty'}")
|
||||||
|
|
||||||
|
if not metadata:
|
||||||
|
logger.info(f"_get_metadata_text - No metadata found, returning 'No metadata available'")
|
||||||
|
return "No metadata available"
|
||||||
|
|
||||||
|
lines = []
|
||||||
|
|
||||||
|
# Only display these specific fields in this order
|
||||||
|
display_fields = [
|
||||||
|
'duration', 'size', 'ext', 'media_type', 'time_imported', 'time_modified', 'hash'
|
||||||
|
]
|
||||||
|
|
||||||
|
# Display fields in a consistent order
|
||||||
|
for field in display_fields:
|
||||||
|
if field in metadata:
|
||||||
|
value = metadata[field]
|
||||||
|
# Skip complex types and None values
|
||||||
|
if isinstance(value, (dict, list)) or value is None:
|
||||||
|
continue
|
||||||
|
# Use central formatting rule
|
||||||
|
formatted_value = format_metadata_value(field, value)
|
||||||
|
# Format: "Field Name: value"
|
||||||
|
field_label = field.replace('_', ' ').title()
|
||||||
|
lines.append(f"{field_label}: {formatted_value}")
|
||||||
|
|
||||||
|
# If we found any fields, display them
|
||||||
|
if lines:
|
||||||
|
logger.info(f"_get_metadata_text - Returning {len(lines)} formatted metadata lines")
|
||||||
|
return "\n".join(lines)
|
||||||
|
else:
|
||||||
|
logger.info(f"_get_metadata_text - No matching fields found in metadata")
|
||||||
|
return "No metadata available"
|
||||||
|
|
||||||
|
def compose(self) -> ComposeResult:
|
||||||
|
"""Compose the export modal screen."""
|
||||||
|
with Container(id="export-container"):
|
||||||
|
yield Static("Export File with Metadata", id="export-title")
|
||||||
|
|
||||||
|
# Row 1: Three columns (Tags, Metadata, Export-To Options)
|
||||||
|
self.tags_textarea = TextArea(
|
||||||
|
text=self._format_tags(),
|
||||||
|
id="tags-area",
|
||||||
|
read_only=False,
|
||||||
|
)
|
||||||
|
yield self.tags_textarea
|
||||||
|
self.tags_textarea.border_title = "Tags"
|
||||||
|
|
||||||
|
# Metadata display instead of files tree
|
||||||
|
self.metadata_display = Static(
|
||||||
|
self._get_metadata_text(),
|
||||||
|
id="metadata-display",
|
||||||
|
)
|
||||||
|
yield self.metadata_display
|
||||||
|
self.metadata_display.border = ("solid", "dodgerblue")
|
||||||
|
|
||||||
|
# Right column: Export options
|
||||||
|
with Vertical(id="export-options"):
|
||||||
|
# Export To selector
|
||||||
|
self.export_to_select = Select(
|
||||||
|
[("0x0", "0x0"), ("Libraries", "libraries"), ("Custom Path", "path")],
|
||||||
|
id="export-to-select"
|
||||||
|
)
|
||||||
|
yield self.export_to_select
|
||||||
|
|
||||||
|
# Libraries selector (initially hidden)
|
||||||
|
library_options = self._get_library_options()
|
||||||
|
self.libraries_select = Select(
|
||||||
|
library_options,
|
||||||
|
id="libraries-select"
|
||||||
|
)
|
||||||
|
yield self.libraries_select
|
||||||
|
|
||||||
|
# Custom path input (initially hidden)
|
||||||
|
self.custom_path_input = Input(
|
||||||
|
placeholder="Enter custom export path",
|
||||||
|
id="custom-path-input"
|
||||||
|
)
|
||||||
|
yield self.custom_path_input
|
||||||
|
|
||||||
|
# Get metadata for size and format options
|
||||||
|
metadata = self.result_data.get('metadata', {})
|
||||||
|
original_size = metadata.get('size', '')
|
||||||
|
ext = metadata.get('ext', '')
|
||||||
|
|
||||||
|
# Store the extension and determine file type
|
||||||
|
self.file_ext = ext
|
||||||
|
self.file_type, format_options = self._determine_file_type(ext)
|
||||||
|
|
||||||
|
# Format size in MB for display
|
||||||
|
if original_size:
|
||||||
|
size_mb = int(original_size / (1024 * 1024)) if isinstance(original_size, (int, float)) else original_size
|
||||||
|
size_display = f"{size_mb}Mb"
|
||||||
|
else:
|
||||||
|
size_display = ""
|
||||||
|
|
||||||
|
# Size input
|
||||||
|
self.size_input = Input(
|
||||||
|
value=size_display,
|
||||||
|
placeholder="Size (can reduce)",
|
||||||
|
id="size-input",
|
||||||
|
disabled=(self.file_type == 'document') # Disable for documents - no resizing needed
|
||||||
|
)
|
||||||
|
yield self.size_input
|
||||||
|
|
||||||
|
# Determine the default format value (match current extension to format options)
|
||||||
|
default_format = None
|
||||||
|
if ext and format_options:
|
||||||
|
# Map extension to format value (e.g., .flac -> "flac", .mp3 -> "mp3", .m4a -> "m4a")
|
||||||
|
ext_lower = ext.lower().lstrip('.') # Remove leading dot if present
|
||||||
|
# Try to find matching format option
|
||||||
|
for _, value in format_options:
|
||||||
|
if value and (ext_lower == value or f".{ext_lower}" == ext or ext.endswith(f".{value}")):
|
||||||
|
default_format = value
|
||||||
|
logger.debug(f"Matched extension {ext} to format {value}")
|
||||||
|
break
|
||||||
|
# If no exact match, use first option
|
||||||
|
if not default_format and format_options:
|
||||||
|
default_format = format_options[0][1]
|
||||||
|
logger.debug(f"No format match for {ext}, using first option: {default_format}")
|
||||||
|
|
||||||
|
# Store the default format to apply after mount
|
||||||
|
self.default_format = default_format
|
||||||
|
|
||||||
|
# Format selector based on file type
|
||||||
|
self.format_select = Select(
|
||||||
|
format_options if format_options else [("No conversion", "")],
|
||||||
|
id="format-select",
|
||||||
|
disabled=not format_options # Disable if no format options (e.g., documents)
|
||||||
|
)
|
||||||
|
yield self.format_select
|
||||||
|
|
||||||
|
# Row 2: Buttons
|
||||||
|
with Horizontal(id="export-buttons"):
|
||||||
|
yield Button("Cancel", id="cancel-btn", variant="default")
|
||||||
|
yield Button("Export", id="export-btn", variant="primary")
|
||||||
|
|
||||||
|
def _format_tags(self) -> str:
|
||||||
|
"""Format tags from result data."""
|
||||||
|
tags = self.result_data.get('tags', '')
|
||||||
|
if isinstance(tags, str):
|
||||||
|
# Split by comma and rejoin with newlines
|
||||||
|
tags_list = [tag.strip() for tag in tags.split(',') if tag.strip()]
|
||||||
|
return '\n'.join(tags_list)
|
||||||
|
elif isinstance(tags, list):
|
||||||
|
return '\n'.join(tags)
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def on_button_pressed(self, event: Button.Pressed) -> None:
|
||||||
|
"""Handle button press events."""
|
||||||
|
button_id = event.button.id
|
||||||
|
|
||||||
|
if button_id == "export-btn":
|
||||||
|
self._handle_export()
|
||||||
|
elif button_id == "cancel-btn":
|
||||||
|
self.action_cancel()
|
||||||
|
|
||||||
|
def on_select_changed(self, event: Select.Changed) -> None:
|
||||||
|
"""Handle select widget changes."""
|
||||||
|
if event.control.id == "export-to-select":
|
||||||
|
# Show/hide custom path and libraries based on selection
|
||||||
|
if self.custom_path_input:
|
||||||
|
self.custom_path_input.display = (event.value == "path")
|
||||||
|
if self.libraries_select:
|
||||||
|
self.libraries_select.display = (event.value == "libraries")
|
||||||
|
elif event.control.id == "libraries-select":
|
||||||
|
# Handle library selection (no special action needed currently)
|
||||||
|
logger.debug(f"Library selected: {event.value}")
|
||||||
|
|
||||||
|
def on_mount(self) -> None:
|
||||||
|
"""Handle mount event."""
|
||||||
|
# Initially hide custom path and libraries inputs (default is "0x0")
|
||||||
|
if self.custom_path_input:
|
||||||
|
self.custom_path_input.display = False
|
||||||
|
if self.libraries_select:
|
||||||
|
self.libraries_select.display = False
|
||||||
|
|
||||||
|
# Set the default format value to show it selected instead of "Select"
|
||||||
|
if self.default_format and self.format_select:
|
||||||
|
self.format_select.value = self.default_format
|
||||||
|
logger.debug(f"Set format selector to default value: {self.default_format}")
|
||||||
|
|
||||||
|
# Refresh metadata display after mount to ensure data is loaded
|
||||||
|
if self.metadata_display:
|
||||||
|
metadata_text = self._get_metadata_text()
|
||||||
|
self.metadata_display.update(metadata_text)
|
||||||
|
logger.debug(f"Updated metadata display on mount: {bool(self.result_data.get('metadata'))}")
|
||||||
|
def _handle_export(self) -> None:
|
||||||
|
"""Handle the export action."""
|
||||||
|
try:
|
||||||
|
tags_text = self.tags_textarea.text.strip()
|
||||||
|
export_to = self.export_to_select.value if self.export_to_select else "0x0"
|
||||||
|
custom_path = self.custom_path_input.value.strip() if self.custom_path_input else ""
|
||||||
|
|
||||||
|
# Get library value - handle Select.BLANK case
|
||||||
|
library = "local" # default
|
||||||
|
if self.libraries_select and str(self.libraries_select.value) != "Select.BLANK":
|
||||||
|
library = str(self.libraries_select.value)
|
||||||
|
elif self.libraries_select and self.libraries_select:
|
||||||
|
# If value is Select.BLANK, try to get from the options
|
||||||
|
try:
|
||||||
|
# Get first available library option as fallback
|
||||||
|
options = self._get_library_options()
|
||||||
|
if options:
|
||||||
|
library = options[0][1] # Get the value part of first option tuple
|
||||||
|
except Exception:
|
||||||
|
library = "local"
|
||||||
|
|
||||||
|
size = self.size_input.value.strip() if self.size_input else ""
|
||||||
|
file_format = self.format_select.value if self.format_select else "mp4"
|
||||||
|
|
||||||
|
# Parse tags from textarea (one per line)
|
||||||
|
export_tags = set()
|
||||||
|
for line in tags_text.split('\n'):
|
||||||
|
tag = line.strip()
|
||||||
|
if tag:
|
||||||
|
export_tags.add(tag)
|
||||||
|
|
||||||
|
# For Hydrus export, filter out metadata-only tags (hash:, known_url:, relationship:)
|
||||||
|
if export_to == "libraries" and library == "hydrus":
|
||||||
|
metadata_prefixes = {'hash:', 'known_url:', 'relationship:'}
|
||||||
|
export_tags = {tag for tag in export_tags if not any(tag.lower().startswith(prefix) for prefix in metadata_prefixes)}
|
||||||
|
logger.info(f"Filtered tags for Hydrus - removed metadata tags, {len(export_tags)} tags remaining")
|
||||||
|
|
||||||
|
# Extract title and add as searchable tags if not already present
|
||||||
|
title = self.result_data.get('title', '').strip()
|
||||||
|
if title:
|
||||||
|
# Add the full title as a tag if not already present
|
||||||
|
title_tag = f"title:{title}"
|
||||||
|
if title_tag not in export_tags and not any(t.startswith('title:') for t in export_tags):
|
||||||
|
export_tags.add(title_tag)
|
||||||
|
|
||||||
|
# Extract individual words from title as searchable tags (if reasonable length)
|
||||||
|
# Skip very short words and common stop words
|
||||||
|
if len(title) < 100: # Only for reasonably short titles
|
||||||
|
stop_words = {'the', 'a', 'an', 'and', 'or', 'of', 'in', 'to', 'for', 'is', 'it', 'at', 'by', 'from', 'with', 'as', 'be', 'on', 'that', 'this', 'this'}
|
||||||
|
words = title.lower().split()
|
||||||
|
for word in words:
|
||||||
|
# Clean up word (remove punctuation)
|
||||||
|
clean_word = ''.join(c for c in word if c.isalnum())
|
||||||
|
# Only add if not a stop word and has some length
|
||||||
|
if clean_word and len(clean_word) > 2 and clean_word not in stop_words:
|
||||||
|
if clean_word not in export_tags:
|
||||||
|
export_tags.add(clean_word)
|
||||||
|
logger.info(f"Extracted {len(words)} words from title, added searchable title tags")
|
||||||
|
|
||||||
|
# Validate required fields - allow export to continue for Hydrus even with 0 actual tags
|
||||||
|
# (metadata tags will still be in the sidecar, and tags can be added later)
|
||||||
|
if not export_tags and export_to != "libraries":
|
||||||
|
logger.warning("No tags provided for export")
|
||||||
|
return
|
||||||
|
|
||||||
|
if export_to == "libraries" and not export_tags:
|
||||||
|
logger.warning("No actual tags for Hydrus export (only metadata was present)")
|
||||||
|
# Don't return - allow export to continue, file will be added to Hydrus even without tags
|
||||||
|
|
||||||
|
# Determine export path
|
||||||
|
export_path = None
|
||||||
|
if export_to == "path":
|
||||||
|
if not custom_path:
|
||||||
|
logger.warning("Custom path required but not provided")
|
||||||
|
return
|
||||||
|
export_path = custom_path
|
||||||
|
elif export_to == "libraries":
|
||||||
|
export_path = library # "local", "hydrus", "debrid"
|
||||||
|
else:
|
||||||
|
export_path = export_to # "0x0"
|
||||||
|
|
||||||
|
# Get metadata from result_data
|
||||||
|
metadata = self.result_data.get('metadata', {})
|
||||||
|
|
||||||
|
# Extract file source info from result_data (passed by hub-ui)
|
||||||
|
file_hash = self.result_data.get('file_hash')
|
||||||
|
file_url = self.result_data.get('file_url')
|
||||||
|
file_path = self.result_data.get('file_path') # For local files
|
||||||
|
source = self.result_data.get('source', 'unknown')
|
||||||
|
|
||||||
|
# Prepare export data
|
||||||
|
export_data = {
|
||||||
|
'export_to': export_to,
|
||||||
|
'export_path': export_path,
|
||||||
|
'library': library if export_to == "libraries" else None,
|
||||||
|
'tags': export_tags,
|
||||||
|
'size': size if size else None,
|
||||||
|
'format': file_format,
|
||||||
|
'metadata': metadata,
|
||||||
|
'original_data': self.result_data,
|
||||||
|
'file_hash': file_hash,
|
||||||
|
'file_url': file_url,
|
||||||
|
'file_path': file_path, # Pass file path for local files
|
||||||
|
'source': source,
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(f"Export initiated: destination={export_path}, format={file_format}, size={size}, tags={export_tags}, source={source}, hash={file_hash}, path={file_path}")
|
||||||
|
|
||||||
|
# Dismiss the modal and return the export data
|
||||||
|
self.dismiss(export_data)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error during export: {e}", exc_info=True)
|
||||||
|
|
||||||
|
def action_cancel(self) -> None:
|
||||||
|
"""Handle cancel action."""
|
||||||
|
self.dismiss(None)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def create_notes_sidecar(file_path: Path, notes: str) -> None:
|
||||||
|
"""Create a .notes sidecar file with notes text.
|
||||||
|
|
||||||
|
Only creates file if notes are not empty.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: Path to the exported file
|
||||||
|
notes: Notes text
|
||||||
|
"""
|
||||||
|
if not notes or not notes.strip():
|
||||||
|
return
|
||||||
|
|
||||||
|
notes_path = file_path.with_suffix(file_path.suffix + '.notes')
|
||||||
|
try:
|
||||||
|
with open(notes_path, 'w', encoding='utf-8') as f:
|
||||||
|
f.write(notes.strip())
|
||||||
|
logger.info(f"Created notes sidecar: {notes_path}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to create notes sidecar: {e}", exc_info=True)
|
||||||
|
|
||||||
|
|
||||||
|
def determine_needs_conversion(current_ext: str, target_format: str) -> bool:
|
||||||
|
"""Determine if conversion is needed between two formats.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
current_ext: Current file extension (e.g., '.flac')
|
||||||
|
target_format: Target format name (e.g., 'mp3') or NoSelection object
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if conversion is needed, False if it's already the target format
|
||||||
|
"""
|
||||||
|
# Handle NoSelection or None
|
||||||
|
if not target_format or target_format == "" or str(target_format.__class__.__name__) == 'NoSelection':
|
||||||
|
return False # No conversion requested
|
||||||
|
|
||||||
|
# Normalize the current extension
|
||||||
|
current_ext_lower = current_ext.lower().lstrip('.')
|
||||||
|
target_format_lower = str(target_format).lower()
|
||||||
|
|
||||||
|
# Check if they match
|
||||||
|
return current_ext_lower != target_format_lower
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_size_tolerance(metadata: dict, user_size_mb: Optional[str]) -> tuple[Optional[int], Optional[int]]:
|
||||||
|
"""Calculate target size with 1MB grace period.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
metadata: File metadata containing 'size' in bytes
|
||||||
|
user_size_mb: User-entered size like "756Mb" or empty string
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (target_bytes, grace_bytes) where grace_bytes is 1MB (1048576),
|
||||||
|
or (None, None) if no size specified
|
||||||
|
"""
|
||||||
|
grace_bytes = 1 * 1024 * 1024 # 1MB grace period
|
||||||
|
|
||||||
|
if not user_size_mb or not user_size_mb.strip():
|
||||||
|
return None, grace_bytes
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Parse the size string (format like "756Mb")
|
||||||
|
size_str = user_size_mb.strip().lower()
|
||||||
|
if size_str.endswith('mb'):
|
||||||
|
size_str = size_str[:-2]
|
||||||
|
elif size_str.endswith('m'):
|
||||||
|
size_str = size_str[:-1]
|
||||||
|
|
||||||
|
size_mb = float(size_str)
|
||||||
|
target_bytes = int(size_mb * 1024 * 1024)
|
||||||
|
return target_bytes, grace_bytes
|
||||||
|
except (ValueError, AttributeError):
|
||||||
|
return None, grace_bytes
|
||||||
85
TUI/modalscreen/export.tcss
Normal file
85
TUI/modalscreen/export.tcss
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
/* Export Modal Screen Styling */
|
||||||
|
|
||||||
|
ExportModal {
|
||||||
|
align: center middle;
|
||||||
|
}
|
||||||
|
|
||||||
|
#export-container {
|
||||||
|
width: 140;
|
||||||
|
height: 55;
|
||||||
|
background: $panel;
|
||||||
|
border: solid $primary;
|
||||||
|
layout: grid;
|
||||||
|
grid-columns: 1fr 1fr 1fr;
|
||||||
|
grid-rows: auto 1fr auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
#export-title {
|
||||||
|
height: 1;
|
||||||
|
text-align: center;
|
||||||
|
text-style: bold;
|
||||||
|
color: $accent;
|
||||||
|
background: $boost;
|
||||||
|
padding: 1 2;
|
||||||
|
column-span: 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Row 1: Three columns */
|
||||||
|
#tags-area {
|
||||||
|
height: 1fr;
|
||||||
|
column-span: 1;
|
||||||
|
border: solid mediumvioletred;
|
||||||
|
}
|
||||||
|
|
||||||
|
#metadata-display {
|
||||||
|
height: 1fr;
|
||||||
|
column-span: 1;
|
||||||
|
border: solid dodgerblue;
|
||||||
|
overflow: auto;
|
||||||
|
padding: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#export-options {
|
||||||
|
height: 1fr;
|
||||||
|
column-span: 1;
|
||||||
|
border: solid mediumpurple;
|
||||||
|
layout: vertical;
|
||||||
|
padding: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#export-options Select,
|
||||||
|
#export-options Input {
|
||||||
|
height: 3;
|
||||||
|
margin: 0 0 1 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#custom-path-input {
|
||||||
|
height: 3;
|
||||||
|
margin: 0 0 1 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#libraries-select {
|
||||||
|
height: 3;
|
||||||
|
margin: 0 0 1 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#size-input {
|
||||||
|
height: 3;
|
||||||
|
margin: 0 0 1 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#format-select {
|
||||||
|
height: 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Row 2: Buttons */
|
||||||
|
#export-buttons {
|
||||||
|
height: auto;
|
||||||
|
column-span: 3;
|
||||||
|
layout: horizontal;
|
||||||
|
}
|
||||||
|
|
||||||
|
#export-buttons Button {
|
||||||
|
width: 1fr;
|
||||||
|
margin: 0 1;
|
||||||
|
}
|
||||||
505
TUI/modalscreen/search.py
Normal file
505
TUI/modalscreen/search.py
Normal file
@@ -0,0 +1,505 @@
|
|||||||
|
"""Search modal screen for OpenLibrary and Soulseek."""
|
||||||
|
|
||||||
|
from textual.app import ComposeResult
|
||||||
|
from textual.screen import ModalScreen
|
||||||
|
from textual.containers import Container, Horizontal, Vertical
|
||||||
|
from textual.widgets import Static, Button, Input, Select, DataTable, TextArea
|
||||||
|
from textual.binding import Binding
|
||||||
|
from textual.message import Message
|
||||||
|
import logging
|
||||||
|
from typing import Optional, Any, List
|
||||||
|
from pathlib import Path
|
||||||
|
import sys
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
# Add parent directory to path for imports
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
from config import load_config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class SearchModal(ModalScreen):
|
||||||
|
"""Modal screen for searching OpenLibrary and Soulseek."""
|
||||||
|
|
||||||
|
BINDINGS = [
|
||||||
|
Binding("escape", "cancel", "Cancel"),
|
||||||
|
Binding("enter", "search_focused", "Search"),
|
||||||
|
Binding("ctrl+t", "scrape_tags", "Scrape Tags"),
|
||||||
|
]
|
||||||
|
|
||||||
|
CSS_PATH = "search.tcss"
|
||||||
|
|
||||||
|
class SearchSelected(Message):
|
||||||
|
"""Posted when user selects a search result."""
|
||||||
|
def __init__(self, result: dict) -> None:
|
||||||
|
self.result = result
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def __init__(self, app_instance=None):
|
||||||
|
"""Initialize the search modal.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
app_instance: Reference to the main App instance for worker creation
|
||||||
|
"""
|
||||||
|
super().__init__()
|
||||||
|
self.app_instance = app_instance
|
||||||
|
self.source_select: Optional[Select] = None
|
||||||
|
self.search_input: Optional[Input] = None
|
||||||
|
self.results_table: Optional[DataTable] = None
|
||||||
|
self.tags_textarea: Optional[TextArea] = None
|
||||||
|
self.library_source_select: Optional[Select] = None
|
||||||
|
self.current_results: List[dict] = []
|
||||||
|
self.is_searching = False
|
||||||
|
self.current_worker = None # Track worker for search operations
|
||||||
|
|
||||||
|
def compose(self) -> ComposeResult:
|
||||||
|
"""Create child widgets for the search modal."""
|
||||||
|
with Vertical(id="search-container"):
|
||||||
|
yield Static("Search Books & Music", id="search-title")
|
||||||
|
|
||||||
|
with Horizontal(id="search-controls"):
|
||||||
|
# Source selector
|
||||||
|
self.source_select = Select(
|
||||||
|
[("OpenLibrary", "openlibrary"), ("Soulseek", "soulseek")],
|
||||||
|
value="openlibrary",
|
||||||
|
id="source-select"
|
||||||
|
)
|
||||||
|
yield self.source_select
|
||||||
|
|
||||||
|
# Search input
|
||||||
|
self.search_input = Input(
|
||||||
|
placeholder="Enter search query...",
|
||||||
|
id="search-input"
|
||||||
|
)
|
||||||
|
yield self.search_input
|
||||||
|
|
||||||
|
# Search button
|
||||||
|
yield Button("Search", id="search-button", variant="primary")
|
||||||
|
|
||||||
|
# Results table
|
||||||
|
self.results_table = DataTable(id="results-table")
|
||||||
|
yield self.results_table
|
||||||
|
|
||||||
|
# Two-column layout: tags on left, source/submit on right
|
||||||
|
with Horizontal(id="bottom-controls"):
|
||||||
|
# Left column: Tags textarea
|
||||||
|
with Vertical(id="tags-column"):
|
||||||
|
self.tags_textarea = TextArea(
|
||||||
|
text="",
|
||||||
|
id="result-tags-textarea",
|
||||||
|
read_only=False
|
||||||
|
)
|
||||||
|
self.tags_textarea.border_title = "Tags [Ctrl+T: Scrape]"
|
||||||
|
yield self.tags_textarea
|
||||||
|
|
||||||
|
# Right column: Library source and submit button
|
||||||
|
with Vertical(id="source-submit-column"):
|
||||||
|
# Library source selector (for OpenLibrary results)
|
||||||
|
self.library_source_select = Select(
|
||||||
|
[("Local", "local"), ("Download", "download")],
|
||||||
|
value="local",
|
||||||
|
id="library-source-select"
|
||||||
|
)
|
||||||
|
yield self.library_source_select
|
||||||
|
|
||||||
|
# Submit button
|
||||||
|
yield Button("Submit", id="submit-button", variant="primary")
|
||||||
|
|
||||||
|
# Buttons at bottom
|
||||||
|
with Horizontal(id="search-buttons"):
|
||||||
|
yield Button("Select", id="select-button", variant="primary")
|
||||||
|
yield Button("Download", id="download-button", variant="primary")
|
||||||
|
yield Button("Cancel", id="cancel-button", variant="default")
|
||||||
|
|
||||||
|
def on_mount(self) -> None:
|
||||||
|
"""Set up the table columns and focus."""
|
||||||
|
# Set up results table columns
|
||||||
|
self.results_table.add_columns(
|
||||||
|
"Title",
|
||||||
|
"Author/Artist",
|
||||||
|
"Year/Album",
|
||||||
|
"Details"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Focus on search input
|
||||||
|
self.search_input.focus()
|
||||||
|
|
||||||
|
async def _search_openlibrary(self, query: str) -> List[dict]:
|
||||||
|
"""Search OpenLibrary for books."""
|
||||||
|
try:
|
||||||
|
from helper.search_provider import get_provider
|
||||||
|
|
||||||
|
logger.info(f"[search-modal] Searching OpenLibrary for: {query}")
|
||||||
|
|
||||||
|
# Get the OpenLibrary provider (now has smart search built-in)
|
||||||
|
provider = get_provider("openlibrary")
|
||||||
|
if not provider:
|
||||||
|
logger.error("[search-modal] OpenLibrary provider not available")
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Search using the provider (smart search is now default)
|
||||||
|
search_results = provider.search(query, limit=20)
|
||||||
|
|
||||||
|
formatted_results = []
|
||||||
|
for result in search_results:
|
||||||
|
# Extract metadata from SearchResult.full_metadata
|
||||||
|
metadata = result.full_metadata or {}
|
||||||
|
|
||||||
|
formatted_results.append({
|
||||||
|
"title": result.title,
|
||||||
|
"author": ", ".join(metadata.get("authors", [])) if metadata.get("authors") else "Unknown",
|
||||||
|
"year": metadata.get("year", ""),
|
||||||
|
"publisher": metadata.get("publisher", ""),
|
||||||
|
"isbn": metadata.get("isbn", ""),
|
||||||
|
"oclc": metadata.get("oclc", ""),
|
||||||
|
"lccn": metadata.get("lccn", ""),
|
||||||
|
"openlibrary_id": metadata.get("olid", ""),
|
||||||
|
"pages": metadata.get("pages", ""),
|
||||||
|
"language": metadata.get("language", ""),
|
||||||
|
"source": "openlibrary",
|
||||||
|
"columns": result.columns,
|
||||||
|
"raw_data": metadata
|
||||||
|
})
|
||||||
|
|
||||||
|
logger.info(f"[search-modal] Found {len(formatted_results)} OpenLibrary results")
|
||||||
|
return formatted_results
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[search-modal] OpenLibrary search error: {e}", exc_info=True)
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return []
|
||||||
|
|
||||||
|
async def _search_soulseek(self, query: str) -> List[dict]:
|
||||||
|
"""Search Soulseek for music with automatic worker tracking."""
|
||||||
|
try:
|
||||||
|
from helper.search_provider import get_provider
|
||||||
|
|
||||||
|
# Create worker for tracking
|
||||||
|
worker = None
|
||||||
|
if self.app_instance and hasattr(self.app_instance, 'create_worker'):
|
||||||
|
worker = self.app_instance.create_worker(
|
||||||
|
'soulseek',
|
||||||
|
title=f"Soulseek Search: {query[:40]}",
|
||||||
|
description=f"Searching P2P network for music"
|
||||||
|
)
|
||||||
|
self.current_worker = worker
|
||||||
|
|
||||||
|
if worker:
|
||||||
|
worker.log_step("Connecting to Soulseek peer network...")
|
||||||
|
|
||||||
|
logger.info(f"[search-modal] Searching Soulseek for: {query}")
|
||||||
|
provider = get_provider("soulseek")
|
||||||
|
search_results = provider.search(query, limit=20)
|
||||||
|
|
||||||
|
if worker:
|
||||||
|
worker.log_step(f"Search returned {len(search_results)} results")
|
||||||
|
|
||||||
|
logger.info(f"[search-modal] Found {len(search_results)} Soulseek results")
|
||||||
|
|
||||||
|
# Format results for display
|
||||||
|
formatted_results = []
|
||||||
|
for idx, result in enumerate(search_results):
|
||||||
|
metadata = result.full_metadata or {}
|
||||||
|
artist = metadata.get('artist', '')
|
||||||
|
album = metadata.get('album', '')
|
||||||
|
title = result.title
|
||||||
|
track_num = metadata.get('track_num', '')
|
||||||
|
size_bytes = result.size_bytes or 0
|
||||||
|
|
||||||
|
# Format size as human-readable
|
||||||
|
if size_bytes > 1024 * 1024:
|
||||||
|
size_str = f"{size_bytes / (1024 * 1024):.1f} MB"
|
||||||
|
elif size_bytes > 1024:
|
||||||
|
size_str = f"{size_bytes / 1024:.1f} KB"
|
||||||
|
else:
|
||||||
|
size_str = f"{size_bytes} B"
|
||||||
|
|
||||||
|
# Build columns for display
|
||||||
|
columns = [
|
||||||
|
("#", str(idx + 1)),
|
||||||
|
("Title", title[:50] if title else "Unknown"),
|
||||||
|
("Artist", artist[:30] if artist else "(no artist)"),
|
||||||
|
("Album", album[:30] if album else ""),
|
||||||
|
]
|
||||||
|
|
||||||
|
formatted_results.append({
|
||||||
|
"title": title if title else "Unknown",
|
||||||
|
"artist": artist if artist else "(no artist)",
|
||||||
|
"album": album,
|
||||||
|
"track": track_num,
|
||||||
|
"filesize": size_str,
|
||||||
|
"bitrate": "", # Not available in Soulseek results
|
||||||
|
"source": "soulseek",
|
||||||
|
"columns": columns,
|
||||||
|
"raw_data": result.to_dict()
|
||||||
|
})
|
||||||
|
|
||||||
|
return formatted_results
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[search-modal] Soulseek search error: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return []
|
||||||
|
|
||||||
|
async def _perform_search(self) -> None:
|
||||||
|
"""Perform the actual search based on selected source."""
|
||||||
|
if not self.search_input or not self.source_select or not self.results_table:
|
||||||
|
logger.error("[search-modal] Widgets not initialized")
|
||||||
|
return
|
||||||
|
|
||||||
|
query = self.search_input.value.strip()
|
||||||
|
if not query:
|
||||||
|
logger.warning("[search-modal] Empty search query")
|
||||||
|
return
|
||||||
|
|
||||||
|
source = self.source_select.value
|
||||||
|
|
||||||
|
# Clear existing results
|
||||||
|
self.results_table.clear()
|
||||||
|
self.current_results = []
|
||||||
|
|
||||||
|
self.is_searching = True
|
||||||
|
|
||||||
|
try:
|
||||||
|
if source == "openlibrary":
|
||||||
|
results = await self._search_openlibrary(query)
|
||||||
|
elif source == "soulseek":
|
||||||
|
results = await self._search_soulseek(query)
|
||||||
|
else:
|
||||||
|
logger.warning(f"[search-modal] Unknown source: {source}")
|
||||||
|
if self.current_worker:
|
||||||
|
self.current_worker.finish("error", "Unknown search source")
|
||||||
|
return
|
||||||
|
|
||||||
|
self.current_results = results
|
||||||
|
|
||||||
|
# Populate table with results
|
||||||
|
if results:
|
||||||
|
# Check if first result has columns field
|
||||||
|
first_result = results[0]
|
||||||
|
if "columns" in first_result and first_result["columns"]:
|
||||||
|
# Use dynamic columns from result
|
||||||
|
# Clear existing columns and rebuild based on result columns
|
||||||
|
self.results_table.clear()
|
||||||
|
|
||||||
|
# Extract column headers from first result's columns field
|
||||||
|
column_headers = [col[0] for col in first_result["columns"]]
|
||||||
|
|
||||||
|
# Remove existing columns (we'll readd them with the right headers)
|
||||||
|
# Note: This is a workaround since Textual's DataTable doesn't support dynamic column management well
|
||||||
|
# For now, we just use the dynamic column headers from the result
|
||||||
|
logger.info(f"[search-modal] Using dynamic columns: {column_headers}")
|
||||||
|
|
||||||
|
# Populate rows using the column order from results
|
||||||
|
for result in results:
|
||||||
|
if "columns" in result and result["columns"]:
|
||||||
|
# Extract values in column order
|
||||||
|
row_data = [col[1] for col in result["columns"]]
|
||||||
|
self.results_table.add_row(*row_data)
|
||||||
|
else:
|
||||||
|
# Fallback for results without columns
|
||||||
|
logger.warning(f"[search-modal] Result missing columns field: {result.get('title', 'Unknown')}")
|
||||||
|
else:
|
||||||
|
# Fallback to original hardcoded behavior if columns not available
|
||||||
|
logger.info("[search-modal] No dynamic columns found, using default formatting")
|
||||||
|
|
||||||
|
for result in results:
|
||||||
|
if source == "openlibrary":
|
||||||
|
# Format OpenLibrary results (original hardcoded)
|
||||||
|
year = str(result.get("year", ""))[:4] if result.get("year") else ""
|
||||||
|
details = f"ISBN: {result.get('isbn', '')}" if result.get('isbn') else ""
|
||||||
|
if result.get('openlibrary_id'):
|
||||||
|
details += f" | OL: {result.get('openlibrary_id')}"
|
||||||
|
|
||||||
|
row_data = [
|
||||||
|
result["title"][:60],
|
||||||
|
result["author"][:35],
|
||||||
|
year,
|
||||||
|
details[:40]
|
||||||
|
]
|
||||||
|
else: # soulseek
|
||||||
|
row_data = [
|
||||||
|
result["title"][:50],
|
||||||
|
result["artist"][:30],
|
||||||
|
result["album"][:30],
|
||||||
|
result['filesize']
|
||||||
|
]
|
||||||
|
|
||||||
|
self.results_table.add_row(*row_data)
|
||||||
|
else:
|
||||||
|
# Add a "no results" message
|
||||||
|
self.results_table.add_row("No results found", "", "", "")
|
||||||
|
|
||||||
|
# Finish worker if tracking
|
||||||
|
if self.current_worker:
|
||||||
|
self.current_worker.finish("completed", f"Found {len(results)} results")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[search-modal] Search error: {e}")
|
||||||
|
if self.current_worker:
|
||||||
|
self.current_worker.finish("error", f"Search failed: {str(e)}")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
self.is_searching = False
|
||||||
|
|
||||||
|
def on_button_pressed(self, event: Button.Pressed) -> None:
|
||||||
|
"""Handle button presses."""
|
||||||
|
button_id = event.button.id
|
||||||
|
|
||||||
|
if button_id == "search-button":
|
||||||
|
# Run search asynchronously
|
||||||
|
asyncio.create_task(self._perform_search())
|
||||||
|
|
||||||
|
elif button_id == "select-button":
|
||||||
|
# Get selected row and populate tags textarea
|
||||||
|
if self.results_table and self.results_table.row_count > 0:
|
||||||
|
selected_row = self.results_table.cursor_row
|
||||||
|
if 0 <= selected_row < len(self.current_results):
|
||||||
|
result = self.current_results[selected_row]
|
||||||
|
# Populate tags textarea with result metadata
|
||||||
|
self._populate_tags_from_result(result)
|
||||||
|
else:
|
||||||
|
logger.warning("[search-modal] No results to select")
|
||||||
|
|
||||||
|
elif button_id == "download-button":
|
||||||
|
# Download the selected result
|
||||||
|
if self.current_results and self.results_table.row_count > 0:
|
||||||
|
selected_row = self.results_table.cursor_row
|
||||||
|
if 0 <= selected_row < len(self.current_results):
|
||||||
|
result = self.current_results[selected_row]
|
||||||
|
if result.get("source") == "openlibrary":
|
||||||
|
asyncio.create_task(self._download_book(result))
|
||||||
|
else:
|
||||||
|
logger.warning("[search-modal] Download only supported for OpenLibrary results")
|
||||||
|
else:
|
||||||
|
logger.warning("[search-modal] No result selected for download")
|
||||||
|
|
||||||
|
elif button_id == "submit-button":
|
||||||
|
# Submit the current result with tags and source
|
||||||
|
if self.current_results and self.results_table.row_count > 0:
|
||||||
|
selected_row = self.results_table.cursor_row
|
||||||
|
if 0 <= selected_row < len(self.current_results):
|
||||||
|
result = self.current_results[selected_row]
|
||||||
|
# Get tags from textarea
|
||||||
|
tags_text = self.tags_textarea.text if self.tags_textarea else ""
|
||||||
|
# Get library source (if OpenLibrary)
|
||||||
|
library_source = self.library_source_select.value if self.library_source_select else "local"
|
||||||
|
|
||||||
|
# Add tags and source to result
|
||||||
|
result["tags_text"] = tags_text
|
||||||
|
result["library_source"] = library_source
|
||||||
|
|
||||||
|
# Post message and dismiss
|
||||||
|
self.post_message(self.SearchSelected(result))
|
||||||
|
self.dismiss(result)
|
||||||
|
else:
|
||||||
|
logger.warning("[search-modal] No result selected for submission")
|
||||||
|
|
||||||
|
elif button_id == "cancel-button":
|
||||||
|
self.dismiss(None)
|
||||||
|
|
||||||
|
def _populate_tags_from_result(self, result: dict) -> None:
|
||||||
|
"""Populate the tags textarea from a selected result."""
|
||||||
|
if not self.tags_textarea:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Format tags based on result source
|
||||||
|
if result.get("source") == "openlibrary":
|
||||||
|
# For OpenLibrary: title, author, year
|
||||||
|
title = result.get("title", "")
|
||||||
|
author = result.get("author", "")
|
||||||
|
year = result.get("year", "")
|
||||||
|
tags = []
|
||||||
|
if title:
|
||||||
|
tags.append(title)
|
||||||
|
if author:
|
||||||
|
tags.append(author)
|
||||||
|
if year:
|
||||||
|
tags.append(year)
|
||||||
|
tags_text = "\n".join(tags)
|
||||||
|
else: # soulseek
|
||||||
|
# For Soulseek: artist, album, title, track
|
||||||
|
tags = []
|
||||||
|
if result.get("artist"):
|
||||||
|
tags.append(result["artist"])
|
||||||
|
if result.get("album"):
|
||||||
|
tags.append(result["album"])
|
||||||
|
if result.get("track"):
|
||||||
|
tags.append(f"Track {result['track']}")
|
||||||
|
if result.get("title"):
|
||||||
|
tags.append(result["title"])
|
||||||
|
tags_text = "\n".join(tags)
|
||||||
|
|
||||||
|
self.tags_textarea.text = tags_text
|
||||||
|
logger.info(f"[search-modal] Populated tags textarea from result")
|
||||||
|
|
||||||
|
async def _download_book(self, result: dict) -> None:
|
||||||
|
"""Download a book from OpenLibrary using unified downloader."""
|
||||||
|
try:
|
||||||
|
from helper.unified_book_downloader import UnifiedBookDownloader
|
||||||
|
from config import load_config
|
||||||
|
|
||||||
|
logger.info(f"[search-modal] Starting download for: {result.get('title')}")
|
||||||
|
|
||||||
|
config = load_config()
|
||||||
|
downloader = UnifiedBookDownloader(config=config)
|
||||||
|
|
||||||
|
# Get download options for this book
|
||||||
|
options = downloader.get_download_options(result)
|
||||||
|
|
||||||
|
if not options['methods']:
|
||||||
|
logger.warning(f"[search-modal] No download methods available for: {result.get('title')}")
|
||||||
|
# Could show a modal dialog here
|
||||||
|
return
|
||||||
|
|
||||||
|
# For now, use the first available method (we could show a dialog to choose)
|
||||||
|
method = options['methods'][0]
|
||||||
|
logger.info(f"[search-modal] Using download method: {method.get('label')}")
|
||||||
|
|
||||||
|
# Perform the download
|
||||||
|
success, message = await downloader.download_book(method)
|
||||||
|
|
||||||
|
if success:
|
||||||
|
logger.info(f"[search-modal] Download successful: {message}")
|
||||||
|
# Could show success dialog
|
||||||
|
else:
|
||||||
|
logger.warning(f"[search-modal] Download failed: {message}")
|
||||||
|
# Could show error dialog
|
||||||
|
|
||||||
|
downloader.close()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[search-modal] Download error: {e}", exc_info=True)
|
||||||
|
|
||||||
|
def action_search_focused(self) -> None:
|
||||||
|
"""Action for Enter key - only search if search input is focused."""
|
||||||
|
if self.search_input and self.search_input.has_focus and not self.is_searching:
|
||||||
|
asyncio.create_task(self._perform_search())
|
||||||
|
|
||||||
|
def action_scrape_tags(self) -> None:
|
||||||
|
"""Action for Ctrl+T - populate tags from selected result."""
|
||||||
|
if self.current_results and self.results_table and self.results_table.row_count > 0:
|
||||||
|
try:
|
||||||
|
selected_row = self.results_table.cursor_row
|
||||||
|
if 0 <= selected_row < len(self.current_results):
|
||||||
|
result = self.current_results[selected_row]
|
||||||
|
self._populate_tags_from_result(result)
|
||||||
|
logger.info(f"[search-modal] Ctrl+T: Populated tags from result at row {selected_row}")
|
||||||
|
else:
|
||||||
|
logger.warning(f"[search-modal] Ctrl+T: Invalid row index {selected_row}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[search-modal] Ctrl+T error: {e}")
|
||||||
|
else:
|
||||||
|
logger.warning("[search-modal] Ctrl+T: No results selected")
|
||||||
|
|
||||||
|
def action_cancel(self) -> None:
|
||||||
|
"""Action for Escape key - close modal."""
|
||||||
|
self.dismiss(None)
|
||||||
|
|
||||||
|
def on_input_submitted(self, event: Input.Submitted) -> None:
|
||||||
|
"""Handle Enter key in search input - only trigger search here."""
|
||||||
|
if event.input.id == "search-input":
|
||||||
|
if not self.is_searching:
|
||||||
|
asyncio.create_task(self._perform_search())
|
||||||
121
TUI/modalscreen/search.tcss
Normal file
121
TUI/modalscreen/search.tcss
Normal file
@@ -0,0 +1,121 @@
|
|||||||
|
/* Search Modal Screen Styling */
|
||||||
|
|
||||||
|
SearchModal {
|
||||||
|
align: center middle;
|
||||||
|
}
|
||||||
|
|
||||||
|
Screen {
|
||||||
|
layout: vertical;
|
||||||
|
}
|
||||||
|
|
||||||
|
#search-container {
|
||||||
|
width: 140;
|
||||||
|
height: 40;
|
||||||
|
background: $panel;
|
||||||
|
border: solid $primary;
|
||||||
|
layout: vertical;
|
||||||
|
}
|
||||||
|
|
||||||
|
Static#search-title {
|
||||||
|
height: 3;
|
||||||
|
dock: top;
|
||||||
|
text-align: center;
|
||||||
|
text-style: bold;
|
||||||
|
color: $accent;
|
||||||
|
background: $boost;
|
||||||
|
padding: 1 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
#search-controls {
|
||||||
|
height: auto;
|
||||||
|
layout: horizontal;
|
||||||
|
padding: 1;
|
||||||
|
border: solid $primary;
|
||||||
|
}
|
||||||
|
|
||||||
|
#source-select {
|
||||||
|
width: 20;
|
||||||
|
margin-right: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#search-input {
|
||||||
|
width: 1fr;
|
||||||
|
margin-right: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#search-button {
|
||||||
|
width: 12;
|
||||||
|
}
|
||||||
|
|
||||||
|
#results-table {
|
||||||
|
height: 1fr;
|
||||||
|
border: solid $primary;
|
||||||
|
}
|
||||||
|
|
||||||
|
DataTable {
|
||||||
|
border: solid $accent;
|
||||||
|
}
|
||||||
|
|
||||||
|
DataTable > .datatable--header {
|
||||||
|
background: $boost;
|
||||||
|
color: $accent;
|
||||||
|
text-style: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
DataTable > .datatable--cursor-row {
|
||||||
|
background: $accent;
|
||||||
|
}
|
||||||
|
|
||||||
|
#bottom-controls {
|
||||||
|
height: auto;
|
||||||
|
layout: horizontal;
|
||||||
|
padding: 1;
|
||||||
|
border: solid $primary;
|
||||||
|
}
|
||||||
|
|
||||||
|
#tags-column {
|
||||||
|
width: 1fr;
|
||||||
|
layout: vertical;
|
||||||
|
padding-right: 1;
|
||||||
|
height: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
#result-tags-textarea {
|
||||||
|
height: 10;
|
||||||
|
width: 1fr;
|
||||||
|
border: solid $accent;
|
||||||
|
}
|
||||||
|
|
||||||
|
#source-submit-column {
|
||||||
|
width: 20;
|
||||||
|
layout: vertical;
|
||||||
|
padding-left: 1;
|
||||||
|
height: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
#library-source-select {
|
||||||
|
width: 1fr;
|
||||||
|
margin-bottom: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#submit-button {
|
||||||
|
width: 1fr;
|
||||||
|
}
|
||||||
|
|
||||||
|
#search-buttons {
|
||||||
|
height: 3;
|
||||||
|
dock: bottom;
|
||||||
|
layout: horizontal;
|
||||||
|
padding: 1;
|
||||||
|
border: solid $primary;
|
||||||
|
align: center middle;
|
||||||
|
}
|
||||||
|
|
||||||
|
#select-button {
|
||||||
|
width: 12;
|
||||||
|
margin-right: 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
#cancel-button {
|
||||||
|
width: 12;
|
||||||
|
}
|
||||||
585
TUI/modalscreen/workers.py
Normal file
585
TUI/modalscreen/workers.py
Normal file
@@ -0,0 +1,585 @@
|
|||||||
|
"""Workers modal screen for monitoring and managing background tasks."""
|
||||||
|
|
||||||
|
from textual.app import ComposeResult
|
||||||
|
from textual.screen import ModalScreen
|
||||||
|
from textual.containers import Horizontal, Vertical
|
||||||
|
from textual.widgets import Static, Button, DataTable, TextArea
|
||||||
|
from textual.binding import Binding
|
||||||
|
from textual.message import Message
|
||||||
|
import logging
|
||||||
|
from typing import Optional, Dict, List, Any
|
||||||
|
from pathlib import Path
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Add parent directory to path for imports
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class WorkersModal(ModalScreen):
|
||||||
|
"""Modal screen for monitoring running and finished workers."""
|
||||||
|
|
||||||
|
BINDINGS = [
|
||||||
|
Binding("escape", "cancel", "Cancel"),
|
||||||
|
]
|
||||||
|
|
||||||
|
CSS_PATH = "workers.tcss"
|
||||||
|
|
||||||
|
class WorkerUpdated(Message):
|
||||||
|
"""Posted when worker list is updated."""
|
||||||
|
def __init__(self, workers: List[Dict[str, Any]]) -> None:
|
||||||
|
self.workers = workers
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
class WorkerCancelled(Message):
|
||||||
|
"""Posted when user cancels a worker."""
|
||||||
|
def __init__(self, worker_id: str) -> None:
|
||||||
|
self.worker_id = worker_id
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def __init__(self, app_instance=None):
|
||||||
|
"""Initialize the workers modal.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
app_instance: Reference to the hub app for accessing worker info
|
||||||
|
"""
|
||||||
|
super().__init__()
|
||||||
|
self.app_instance = app_instance
|
||||||
|
self.running_table: Optional[DataTable] = None
|
||||||
|
self.finished_table: Optional[DataTable] = None
|
||||||
|
self.stdout_display: Optional[TextArea] = None
|
||||||
|
self.running_workers: List[Dict[str, Any]] = []
|
||||||
|
self.finished_workers: List[Dict[str, Any]] = []
|
||||||
|
self.selected_worker_id: Optional[str] = None
|
||||||
|
self.show_running = False # Start with finished tab
|
||||||
|
|
||||||
|
def compose(self) -> ComposeResult:
|
||||||
|
"""Create child widgets for the workers modal."""
|
||||||
|
with Vertical(id="workers-container"):
|
||||||
|
# Title with toggle buttons
|
||||||
|
with Horizontal(id="workers-title-bar"):
|
||||||
|
yield Static("Workers Monitor", id="workers-title")
|
||||||
|
yield Button("Running", id="toggle-running-btn", variant="primary")
|
||||||
|
yield Button("Finished", id="toggle-finished-btn", variant="default")
|
||||||
|
|
||||||
|
# Running tab content (initially hidden)
|
||||||
|
with Vertical(id="running-section"):
|
||||||
|
self.running_table = DataTable(id="running-table")
|
||||||
|
yield self.running_table
|
||||||
|
|
||||||
|
with Horizontal(id="running-controls"):
|
||||||
|
yield Button("Refresh", id="running-refresh-btn", variant="primary")
|
||||||
|
yield Button("Stop Selected", id="running-stop-btn", variant="warning")
|
||||||
|
yield Button("Stop All", id="running-stop-all-btn", variant="error")
|
||||||
|
|
||||||
|
# Finished tab content (initially visible)
|
||||||
|
with Vertical(id="finished-section"):
|
||||||
|
self.finished_table = DataTable(id="finished-table")
|
||||||
|
yield self.finished_table
|
||||||
|
|
||||||
|
with Horizontal(id="finished-controls"):
|
||||||
|
yield Button("Refresh", id="finished-refresh-btn", variant="primary")
|
||||||
|
yield Button("Clear Selected", id="finished-clear-btn", variant="warning")
|
||||||
|
yield Button("Clear All", id="finished-clear-all-btn", variant="error")
|
||||||
|
|
||||||
|
# Shared textarea for displaying worker logs
|
||||||
|
with Vertical(id="logs-section"):
|
||||||
|
yield Static("Worker Logs:", id="logs-label")
|
||||||
|
self.stdout_display = TextArea(id="stdout-display", read_only=True)
|
||||||
|
yield self.stdout_display
|
||||||
|
|
||||||
|
with Horizontal(id="workers-buttons"):
|
||||||
|
yield Button("Close", id="close-btn", variant="primary")
|
||||||
|
|
||||||
|
def on_mount(self) -> None:
|
||||||
|
"""Set up the tables and load worker data."""
|
||||||
|
# Set up running workers table
|
||||||
|
if self.running_table:
|
||||||
|
self.running_table.add_columns(
|
||||||
|
"ID",
|
||||||
|
"Type",
|
||||||
|
"Status",
|
||||||
|
"Pipe",
|
||||||
|
"Progress",
|
||||||
|
"Started",
|
||||||
|
"Details"
|
||||||
|
)
|
||||||
|
self.running_table.zebra_stripes = True
|
||||||
|
|
||||||
|
# Set up finished workers table
|
||||||
|
if self.finished_table:
|
||||||
|
self.finished_table.add_columns(
|
||||||
|
"ID",
|
||||||
|
"Type",
|
||||||
|
"Result",
|
||||||
|
"Pipe",
|
||||||
|
"Started",
|
||||||
|
"Completed",
|
||||||
|
"Duration",
|
||||||
|
"Details"
|
||||||
|
)
|
||||||
|
self.finished_table.zebra_stripes = True
|
||||||
|
|
||||||
|
# Set initial view (show finished by default)
|
||||||
|
self._update_view_visibility()
|
||||||
|
|
||||||
|
# Load initial data
|
||||||
|
self.refresh_workers()
|
||||||
|
|
||||||
|
# Don't set up periodic refresh - it was causing issues with stdout display
|
||||||
|
# Users can click the Refresh button to update manually
|
||||||
|
|
||||||
|
def refresh_workers(self) -> None:
|
||||||
|
"""Refresh the workers data from app instance."""
|
||||||
|
try:
|
||||||
|
if not self.app_instance:
|
||||||
|
logger.warning("[workers-modal] No app instance provided")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Get running workers from app instance
|
||||||
|
# This assumes the app has a get_running_workers() method
|
||||||
|
if hasattr(self.app_instance, 'get_running_workers'):
|
||||||
|
self.running_workers = self.app_instance.get_running_workers()
|
||||||
|
else:
|
||||||
|
self.running_workers = []
|
||||||
|
|
||||||
|
# Get finished workers from app instance
|
||||||
|
if hasattr(self.app_instance, 'get_finished_workers'):
|
||||||
|
self.finished_workers = self.app_instance.get_finished_workers()
|
||||||
|
if self.finished_workers:
|
||||||
|
logger.info(f"[workers-modal-refresh] Got {len(self.finished_workers)} finished workers from app")
|
||||||
|
# Log the keys in the first worker to verify structure
|
||||||
|
if isinstance(self.finished_workers[0], dict):
|
||||||
|
logger.info(f"[workers-modal-refresh] First worker keys: {list(self.finished_workers[0].keys())}")
|
||||||
|
logger.info(f"[workers-modal-refresh] First worker: {self.finished_workers[0]}")
|
||||||
|
else:
|
||||||
|
logger.warning(f"[workers-modal-refresh] First worker is not a dict: {type(self.finished_workers[0])}")
|
||||||
|
else:
|
||||||
|
self.finished_workers = []
|
||||||
|
|
||||||
|
# Update tables
|
||||||
|
self._update_running_table()
|
||||||
|
self._update_finished_table()
|
||||||
|
|
||||||
|
logger.info(f"[workers-modal] Refreshed: {len(self.running_workers)} running, {len(self.finished_workers)} finished")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[workers-modal] Error refreshing workers: {e}")
|
||||||
|
|
||||||
|
def _update_view_visibility(self) -> None:
|
||||||
|
"""Toggle visibility between running and finished views."""
|
||||||
|
try:
|
||||||
|
running_section = self.query_one("#running-section", Vertical)
|
||||||
|
finished_section = self.query_one("#finished-section", Vertical)
|
||||||
|
toggle_running_btn = self.query_one("#toggle-running-btn", Button)
|
||||||
|
toggle_finished_btn = self.query_one("#toggle-finished-btn", Button)
|
||||||
|
|
||||||
|
if self.show_running:
|
||||||
|
running_section.display = True
|
||||||
|
finished_section.display = False
|
||||||
|
toggle_running_btn.variant = "primary"
|
||||||
|
toggle_finished_btn.variant = "default"
|
||||||
|
logger.debug("[workers-modal] Switched to Running view")
|
||||||
|
else:
|
||||||
|
running_section.display = False
|
||||||
|
finished_section.display = True
|
||||||
|
toggle_running_btn.variant = "default"
|
||||||
|
toggle_finished_btn.variant = "primary"
|
||||||
|
logger.debug("[workers-modal] Switched to Finished view")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[workers-modal] Error updating view visibility: {e}")
|
||||||
|
|
||||||
|
def _update_running_table(self) -> None:
|
||||||
|
"""Update the running workers table."""
|
||||||
|
try:
|
||||||
|
if not self.running_table:
|
||||||
|
logger.error("[workers-modal] Running table not initialized")
|
||||||
|
return
|
||||||
|
|
||||||
|
self.running_table.clear()
|
||||||
|
|
||||||
|
if not self.running_workers:
|
||||||
|
self.running_table.add_row("---", "---", "---", "---", "---", "---", "No workers running")
|
||||||
|
logger.debug(f"[workers-modal] No running workers to display")
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.debug(f"[workers-modal] Updating running table with {len(self.running_workers)} workers")
|
||||||
|
|
||||||
|
for idx, worker_info in enumerate(self.running_workers):
|
||||||
|
try:
|
||||||
|
worker_id = worker_info.get('id', 'unknown')
|
||||||
|
worker_type = worker_info.get('type', 'unknown')
|
||||||
|
status = worker_info.get('status', 'running')
|
||||||
|
progress = worker_info.get('progress', '')
|
||||||
|
started = worker_info.get('started', '')
|
||||||
|
details = worker_info.get('details', '')
|
||||||
|
pipe = worker_info.get('pipe', '')
|
||||||
|
|
||||||
|
# Ensure values are strings
|
||||||
|
worker_id = str(worker_id) if worker_id else 'unknown'
|
||||||
|
worker_type = str(worker_type) if worker_type else 'unknown'
|
||||||
|
status = str(status) if status else 'running'
|
||||||
|
progress = str(progress) if progress else '---'
|
||||||
|
started = str(started) if started else '---'
|
||||||
|
details = str(details) if details else '---'
|
||||||
|
pipe_display = self._summarize_pipe(pipe)
|
||||||
|
|
||||||
|
# Truncate long strings
|
||||||
|
progress = progress[:20]
|
||||||
|
started = started[:19]
|
||||||
|
details = details[:30]
|
||||||
|
pipe_display = pipe_display[:40]
|
||||||
|
|
||||||
|
self.running_table.add_row(
|
||||||
|
worker_id[:8],
|
||||||
|
worker_type[:15],
|
||||||
|
status[:10],
|
||||||
|
pipe_display,
|
||||||
|
progress,
|
||||||
|
started,
|
||||||
|
details
|
||||||
|
)
|
||||||
|
|
||||||
|
if idx == 0: # Log first entry
|
||||||
|
logger.debug(f"[workers-modal] Added running row {idx}: {worker_id[:8]} {worker_type[:15]} {status}")
|
||||||
|
except Exception as row_error:
|
||||||
|
logger.error(f"[workers-modal] Error adding running row {idx}: {row_error}", exc_info=True)
|
||||||
|
|
||||||
|
logger.debug(f"[workers-modal] Updated running table with {len(self.running_workers)} workers")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[workers-modal] Error updating running table: {e}", exc_info=True)
|
||||||
|
|
||||||
|
def _update_finished_table(self) -> None:
|
||||||
|
"""Update the finished workers table."""
|
||||||
|
try:
|
||||||
|
if not self.finished_table:
|
||||||
|
logger.error("[workers-modal] Finished table not initialized")
|
||||||
|
return
|
||||||
|
|
||||||
|
self.finished_table.clear()
|
||||||
|
|
||||||
|
if not self.finished_workers:
|
||||||
|
self.finished_table.add_row("---", "---", "---", "---", "---", "---", "---", "No finished workers")
|
||||||
|
logger.debug(f"[workers-modal] No finished workers to display")
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info(f"[workers-modal-update] STARTING to update finished table with {len(self.finished_workers)} workers")
|
||||||
|
added_count = 0
|
||||||
|
error_count = 0
|
||||||
|
|
||||||
|
for idx, worker_info in enumerate(self.finished_workers):
|
||||||
|
try:
|
||||||
|
worker_id = worker_info.get('id', 'unknown')
|
||||||
|
worker_type = worker_info.get('type', 'unknown')
|
||||||
|
result = worker_info.get('result', 'unknown')
|
||||||
|
completed = worker_info.get('completed', '')
|
||||||
|
duration = worker_info.get('duration', '')
|
||||||
|
details = worker_info.get('details', '')
|
||||||
|
pipe = worker_info.get('pipe', '')
|
||||||
|
started = worker_info.get('started', '')
|
||||||
|
|
||||||
|
# Ensure values are strings
|
||||||
|
worker_id = str(worker_id) if worker_id else 'unknown'
|
||||||
|
worker_type = str(worker_type) if worker_type else 'unknown'
|
||||||
|
result = str(result) if result else 'unknown'
|
||||||
|
completed = str(completed) if completed else '---'
|
||||||
|
duration = str(duration) if duration else '---'
|
||||||
|
details = str(details) if details else '---'
|
||||||
|
started = str(started) if started else '---'
|
||||||
|
pipe_display = self._summarize_pipe(pipe)
|
||||||
|
|
||||||
|
# Truncate long strings
|
||||||
|
result = result[:15]
|
||||||
|
completed = completed[:19]
|
||||||
|
started = started[:19]
|
||||||
|
duration = duration[:10]
|
||||||
|
details = details[:30]
|
||||||
|
pipe_display = pipe_display[:40]
|
||||||
|
|
||||||
|
self.finished_table.add_row(
|
||||||
|
worker_id[:8],
|
||||||
|
worker_type[:15],
|
||||||
|
result,
|
||||||
|
pipe_display,
|
||||||
|
started,
|
||||||
|
completed,
|
||||||
|
duration,
|
||||||
|
details
|
||||||
|
)
|
||||||
|
added_count += 1
|
||||||
|
|
||||||
|
except Exception as row_error:
|
||||||
|
error_count += 1
|
||||||
|
logger.error(f"[workers-modal-update] Error adding finished row {idx}: {row_error}", exc_info=True)
|
||||||
|
|
||||||
|
logger.info(f"[workers-modal-update] COMPLETED: Added {added_count}/{len(self.finished_workers)} finished workers (errors: {error_count})")
|
||||||
|
logger.debug(f"[workers-modal-update] Finished table row_count after update: {self.finished_table.row_count}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[workers-modal] Error updating finished table: {e}", exc_info=True)
|
||||||
|
|
||||||
|
def on_data_table_row_highlighted(self, event: DataTable.RowHighlighted) -> None:
|
||||||
|
"""Handle row highlight in tables - display stdout."""
|
||||||
|
try:
|
||||||
|
logger.info(f"[workers-modal] Row highlighted, cursor_row: {event.cursor_row}")
|
||||||
|
|
||||||
|
# Get the selected worker from the correct table
|
||||||
|
workers_list = None
|
||||||
|
if event.control == self.running_table:
|
||||||
|
workers_list = self.running_workers
|
||||||
|
logger.debug(f"[workers-modal] Highlighted in running table")
|
||||||
|
elif event.control == self.finished_table:
|
||||||
|
workers_list = self.finished_workers
|
||||||
|
logger.debug(f"[workers-modal] Highlighted in finished table, list size: {len(workers_list)}")
|
||||||
|
else:
|
||||||
|
logger.warning(f"[workers-modal] Unknown table: {event.control}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Get the worker at this row
|
||||||
|
if workers_list and 0 <= event.cursor_row < len(workers_list):
|
||||||
|
worker = workers_list[event.cursor_row]
|
||||||
|
worker_id = worker.get('id', '')
|
||||||
|
logger.info(f"[workers-modal] Highlighted worker: {worker_id}")
|
||||||
|
|
||||||
|
if worker_id:
|
||||||
|
self.selected_worker_id = worker_id
|
||||||
|
# Display the stdout
|
||||||
|
self._update_stdout_display(worker_id, worker)
|
||||||
|
else:
|
||||||
|
logger.warning(f"[workers-modal] Row {event.cursor_row} out of bounds for list of size {len(workers_list) if workers_list else 0}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[workers-modal] Error handling row highlight: {e}", exc_info=True)
|
||||||
|
|
||||||
|
def on_data_table_cell_highlighted(self, event: DataTable.CellHighlighted) -> None:
|
||||||
|
"""Handle cell highlight in tables - display stdout (backup for row selection)."""
|
||||||
|
try:
|
||||||
|
# CellHighlighted has coordinate (row, column) not cursor_row
|
||||||
|
cursor_row = event.coordinate.row
|
||||||
|
logger.debug(f"[workers-modal] Cell highlighted, row: {cursor_row}, column: {event.coordinate.column}")
|
||||||
|
|
||||||
|
# Get the selected worker from the correct table
|
||||||
|
workers_list = None
|
||||||
|
if event.data_table == self.running_table:
|
||||||
|
workers_list = self.running_workers
|
||||||
|
logger.debug(f"[workers-modal] Cell highlighted in running table")
|
||||||
|
elif event.data_table == self.finished_table:
|
||||||
|
workers_list = self.finished_workers
|
||||||
|
logger.debug(f"[workers-modal] Cell highlighted in finished table, list size: {len(workers_list)}")
|
||||||
|
else:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Get the worker at this row
|
||||||
|
if workers_list and 0 <= cursor_row < len(workers_list):
|
||||||
|
worker = workers_list[cursor_row]
|
||||||
|
worker_id = worker.get('id', '')
|
||||||
|
|
||||||
|
if worker_id and worker_id != self.selected_worker_id:
|
||||||
|
logger.info(f"[workers-modal] Cell-highlighted worker: {worker_id}")
|
||||||
|
self.selected_worker_id = worker_id
|
||||||
|
# Display the stdout
|
||||||
|
self._update_stdout_display(worker_id, worker)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"[workers-modal] Error handling cell highlight: {e}")
|
||||||
|
|
||||||
|
def _update_stdout_display(self, worker_id: str, worker: Optional[Dict[str, Any]] = None) -> None:
|
||||||
|
"""Update the stdout textarea with logs from the selected worker."""
|
||||||
|
try:
|
||||||
|
if not self.stdout_display:
|
||||||
|
logger.error("[workers-modal] stdout_display not initialized")
|
||||||
|
return
|
||||||
|
logger.debug(f"[workers-modal] Updating stdout display for worker: {worker_id}")
|
||||||
|
worker_data = worker or self._locate_worker(worker_id)
|
||||||
|
stdout_text = self._resolve_worker_stdout(worker_id, worker_data)
|
||||||
|
pipe_text = self._resolve_worker_pipe(worker_id, worker_data)
|
||||||
|
events = self._get_worker_events(worker_id)
|
||||||
|
timeline_text = self._format_worker_timeline(events)
|
||||||
|
sections = []
|
||||||
|
if pipe_text:
|
||||||
|
sections.append(f"Pipe:\n{pipe_text}")
|
||||||
|
if timeline_text:
|
||||||
|
sections.append("Timeline:\n" + timeline_text)
|
||||||
|
logs_body = (stdout_text or "").strip()
|
||||||
|
sections.append("Logs:\n" + (logs_body if logs_body else "(no logs recorded)"))
|
||||||
|
combined_text = "\n\n".join(sections)
|
||||||
|
logger.debug(f"[workers-modal] Setting textarea to {len(combined_text)} chars (stdout_len={len(stdout_text or '')})")
|
||||||
|
self.stdout_display.text = combined_text
|
||||||
|
if len(combined_text) > 10:
|
||||||
|
try:
|
||||||
|
self.stdout_display.cursor_location = (len(combined_text) - 1, 0)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
logger.info(f"[workers-modal] Updated stdout display successfully")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[workers-modal] Error updating stdout display: {e}", exc_info=True)
|
||||||
|
|
||||||
|
def _locate_worker(self, worker_id: str) -> Optional[Dict[str, Any]]:
|
||||||
|
for worker in self.running_workers or []:
|
||||||
|
if isinstance(worker, dict) and worker.get('id') == worker_id:
|
||||||
|
return worker
|
||||||
|
for worker in self.finished_workers or []:
|
||||||
|
if isinstance(worker, dict) and worker.get('id') == worker_id:
|
||||||
|
return worker
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _resolve_worker_stdout(self, worker_id: str, worker: Optional[Dict[str, Any]]) -> str:
|
||||||
|
if worker and worker.get('stdout'):
|
||||||
|
return worker.get('stdout', '') or ''
|
||||||
|
manager = getattr(self.app_instance, 'worker_manager', None)
|
||||||
|
if manager:
|
||||||
|
try:
|
||||||
|
return manager.get_stdout(worker_id) or ''
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug(f"[workers-modal] Could not fetch stdout for {worker_id}: {exc}")
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def _resolve_worker_pipe(self, worker_id: str, worker: Optional[Dict[str, Any]]) -> str:
|
||||||
|
if worker and worker.get('pipe'):
|
||||||
|
return str(worker.get('pipe'))
|
||||||
|
record = self._fetch_worker_record(worker_id)
|
||||||
|
if record and record.get('pipe'):
|
||||||
|
return str(record.get('pipe'))
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def _fetch_worker_record(self, worker_id: str) -> Optional[Dict[str, Any]]:
|
||||||
|
manager = getattr(self.app_instance, 'worker_manager', None)
|
||||||
|
if not manager:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return manager.get_worker(worker_id)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug(f"[workers-modal] Could not fetch worker record {worker_id}: {exc}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _get_worker_events(self, worker_id: str, limit: int = 250) -> List[Dict[str, Any]]:
|
||||||
|
manager = getattr(self.app_instance, 'worker_manager', None)
|
||||||
|
if not manager:
|
||||||
|
return []
|
||||||
|
try:
|
||||||
|
return manager.get_worker_events(worker_id, limit=limit)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug(f"[workers-modal] Could not fetch worker events {worker_id}: {exc}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _format_worker_timeline(self, events: List[Dict[str, Any]]) -> str:
|
||||||
|
if not events:
|
||||||
|
return ""
|
||||||
|
lines: List[str] = []
|
||||||
|
for event in events:
|
||||||
|
timestamp = self._format_event_timestamp(event.get('created_at'))
|
||||||
|
label = (event.get('event_type') or '').upper() or 'EVENT'
|
||||||
|
channel = (event.get('channel') or '').upper()
|
||||||
|
if channel and channel not in label:
|
||||||
|
label = f"{label}/{channel}"
|
||||||
|
step = event.get('step') or ''
|
||||||
|
message = event.get('message') or ''
|
||||||
|
prefix = ''
|
||||||
|
if event.get('event_type') == 'step' and step:
|
||||||
|
prefix = f"{step} :: "
|
||||||
|
elif step and step not in message:
|
||||||
|
prefix = f"{step} :: "
|
||||||
|
formatted_message = self._format_message_block(message)
|
||||||
|
lines.append(f"[{timestamp}] {label}: {prefix}{formatted_message}")
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
def _format_event_timestamp(self, raw_timestamp: Any) -> str:
|
||||||
|
if not raw_timestamp:
|
||||||
|
return "--:--:--"
|
||||||
|
text = str(raw_timestamp)
|
||||||
|
if "T" in text:
|
||||||
|
time_part = text.split("T", 1)[1]
|
||||||
|
elif " " in text:
|
||||||
|
time_part = text.split(" ", 1)[1]
|
||||||
|
else:
|
||||||
|
time_part = text
|
||||||
|
return time_part[:8] if len(time_part) >= 8 else time_part
|
||||||
|
|
||||||
|
def _format_message_block(self, message: str) -> str:
|
||||||
|
clean = (message or '').strip()
|
||||||
|
if not clean:
|
||||||
|
return "(empty)"
|
||||||
|
lines = clean.splitlines()
|
||||||
|
if len(lines) == 1:
|
||||||
|
return lines[0]
|
||||||
|
head, *rest = lines
|
||||||
|
indented = "\n".join(f" {line}" for line in rest)
|
||||||
|
return f"{head}\n{indented}"
|
||||||
|
|
||||||
|
def _summarize_pipe(self, pipe_value: Any, limit: int = 40) -> str:
|
||||||
|
text = str(pipe_value or '').strip()
|
||||||
|
if not text:
|
||||||
|
return "(none)"
|
||||||
|
return text if len(text) <= limit else text[: limit - 3] + '...'
|
||||||
|
|
||||||
|
def on_button_pressed(self, event: Button.Pressed) -> None:
|
||||||
|
"""Handle button presses."""
|
||||||
|
button_id = event.button.id
|
||||||
|
|
||||||
|
try:
|
||||||
|
if button_id == "toggle-running-btn":
|
||||||
|
self.show_running = True
|
||||||
|
self._update_view_visibility()
|
||||||
|
return
|
||||||
|
|
||||||
|
elif button_id == "toggle-finished-btn":
|
||||||
|
self.show_running = False
|
||||||
|
self._update_view_visibility()
|
||||||
|
return
|
||||||
|
|
||||||
|
if button_id == "running-refresh-btn":
|
||||||
|
self.refresh_workers()
|
||||||
|
|
||||||
|
elif button_id == "running-stop-btn":
|
||||||
|
# Stop selected running worker
|
||||||
|
if self.running_table and self.running_table.row_count > 0:
|
||||||
|
try:
|
||||||
|
selected_row = self.running_table.cursor_row
|
||||||
|
if 0 <= selected_row < len(self.running_workers):
|
||||||
|
worker = self.running_workers[selected_row]
|
||||||
|
worker_id = worker.get('id')
|
||||||
|
if self.app_instance and hasattr(self.app_instance, 'stop_worker'):
|
||||||
|
self.app_instance.stop_worker(worker_id)
|
||||||
|
logger.info(f"[workers-modal] Stopped worker: {worker_id}")
|
||||||
|
self.refresh_workers()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[workers-modal] Error stopping worker: {e}")
|
||||||
|
|
||||||
|
elif button_id == "running-stop-all-btn":
|
||||||
|
# Stop all running workers
|
||||||
|
if self.app_instance and hasattr(self.app_instance, 'stop_all_workers'):
|
||||||
|
self.app_instance.stop_all_workers()
|
||||||
|
logger.info("[workers-modal] Stopped all workers")
|
||||||
|
self.refresh_workers()
|
||||||
|
|
||||||
|
elif button_id == "finished-refresh-btn":
|
||||||
|
self.refresh_workers()
|
||||||
|
|
||||||
|
elif button_id == "finished-clear-btn":
|
||||||
|
# Clear selected finished worker
|
||||||
|
if self.finished_table and self.finished_table.row_count > 0:
|
||||||
|
try:
|
||||||
|
selected_row = self.finished_table.cursor_row
|
||||||
|
if 0 <= selected_row < len(self.finished_workers):
|
||||||
|
worker = self.finished_workers[selected_row]
|
||||||
|
worker_id = worker.get('id')
|
||||||
|
if self.app_instance and hasattr(self.app_instance, 'clear_finished_worker'):
|
||||||
|
self.app_instance.clear_finished_worker(worker_id)
|
||||||
|
logger.info(f"[workers-modal] Cleared worker: {worker_id}")
|
||||||
|
self.refresh_workers()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[workers-modal] Error clearing worker: {e}")
|
||||||
|
|
||||||
|
elif button_id == "finished-clear-all-btn":
|
||||||
|
# Clear all finished workers
|
||||||
|
if self.app_instance and hasattr(self.app_instance, 'clear_all_finished_workers'):
|
||||||
|
self.app_instance.clear_all_finished_workers()
|
||||||
|
logger.info("[workers-modal] Cleared all finished workers")
|
||||||
|
self.refresh_workers()
|
||||||
|
|
||||||
|
elif button_id == "close-btn":
|
||||||
|
self.dismiss(None)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[workers-modal] Error in on_button_pressed: {e}")
|
||||||
|
|
||||||
|
def action_cancel(self) -> None:
|
||||||
|
"""Action for Escape key - close modal."""
|
||||||
|
self.dismiss(None)
|
||||||
119
TUI/modalscreen/workers.tcss
Normal file
119
TUI/modalscreen/workers.tcss
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
/* Workers Modal Stylesheet */
|
||||||
|
|
||||||
|
Screen {
|
||||||
|
background: $surface;
|
||||||
|
color: $text;
|
||||||
|
}
|
||||||
|
|
||||||
|
#workers-container {
|
||||||
|
width: 100%;
|
||||||
|
height: 100%;
|
||||||
|
layout: vertical;
|
||||||
|
background: $panel;
|
||||||
|
}
|
||||||
|
|
||||||
|
#workers-title-bar {
|
||||||
|
dock: top;
|
||||||
|
height: 3;
|
||||||
|
layout: horizontal;
|
||||||
|
background: $boost;
|
||||||
|
border: solid $accent;
|
||||||
|
padding: 0 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#workers-title {
|
||||||
|
width: 1fr;
|
||||||
|
height: 100%;
|
||||||
|
content-align-vertical: middle;
|
||||||
|
color: $text;
|
||||||
|
text-style: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
#toggle-running-btn,
|
||||||
|
#toggle-finished-btn {
|
||||||
|
width: auto;
|
||||||
|
height: 100%;
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#running-section,
|
||||||
|
#finished-section {
|
||||||
|
width: 100%;
|
||||||
|
height: 40%;
|
||||||
|
layout: vertical;
|
||||||
|
border: solid $accent;
|
||||||
|
}
|
||||||
|
|
||||||
|
#running-table,
|
||||||
|
#finished-table {
|
||||||
|
width: 100%;
|
||||||
|
height: 1fr;
|
||||||
|
border: solid $accent;
|
||||||
|
}
|
||||||
|
|
||||||
|
#running-controls,
|
||||||
|
#finished-controls {
|
||||||
|
width: 100%;
|
||||||
|
height: auto;
|
||||||
|
min-height: 3;
|
||||||
|
layout: horizontal;
|
||||||
|
background: $boost;
|
||||||
|
padding: 1;
|
||||||
|
border-top: solid $accent;
|
||||||
|
}
|
||||||
|
|
||||||
|
#running-controls Button,
|
||||||
|
#finished-controls Button {
|
||||||
|
margin-right: 1;
|
||||||
|
min-width: 15;
|
||||||
|
}
|
||||||
|
|
||||||
|
#logs-label {
|
||||||
|
height: 1;
|
||||||
|
margin: 0 1;
|
||||||
|
text-style: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
#logs-section {
|
||||||
|
width: 100%;
|
||||||
|
height: 1fr;
|
||||||
|
layout: vertical;
|
||||||
|
border: solid $accent;
|
||||||
|
background: $panel;
|
||||||
|
}
|
||||||
|
|
||||||
|
#stdout-display {
|
||||||
|
width: 100%;
|
||||||
|
height: 1fr;
|
||||||
|
border: solid $accent;
|
||||||
|
margin: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#workers-buttons {
|
||||||
|
dock: bottom;
|
||||||
|
height: auto;
|
||||||
|
min-height: 3;
|
||||||
|
layout: horizontal;
|
||||||
|
border: solid $accent;
|
||||||
|
padding: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#workers-buttons Button {
|
||||||
|
margin-right: 1;
|
||||||
|
min-width: 15;
|
||||||
|
}
|
||||||
|
|
||||||
|
DataTable {
|
||||||
|
border: solid $accent;
|
||||||
|
}
|
||||||
|
|
||||||
|
DataTable > .datatable--header {
|
||||||
|
background: $boost;
|
||||||
|
color: $text;
|
||||||
|
text-style: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
DataTable > .datatable--cursor {
|
||||||
|
background: $accent;
|
||||||
|
color: $panel;
|
||||||
|
}
|
||||||
356
TUI/pipeline_runner.py
Normal file
356
TUI/pipeline_runner.py
Normal file
@@ -0,0 +1,356 @@
|
|||||||
|
"""Pipeline execution utilities for the Textual UI.
|
||||||
|
|
||||||
|
This module mirrors the CLI pipeline behaviour while exposing a class-based
|
||||||
|
interface that the TUI can call. It keeps all pipeline/cmdlet integration in
|
||||||
|
one place so the interface layer stays focused on presentation.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import contextlib
|
||||||
|
import io
|
||||||
|
import shlex
|
||||||
|
import uuid
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Callable, Dict, List, Optional, Sequence
|
||||||
|
|
||||||
|
BASE_DIR = Path(__file__).resolve().parent
|
||||||
|
ROOT_DIR = BASE_DIR.parent
|
||||||
|
for path in (ROOT_DIR, BASE_DIR):
|
||||||
|
str_path = str(path)
|
||||||
|
if str_path not in sys.path:
|
||||||
|
sys.path.insert(0, str_path)
|
||||||
|
|
||||||
|
import pipeline as ctx
|
||||||
|
from cmdlets import REGISTRY
|
||||||
|
from config import get_local_storage_path, load_config
|
||||||
|
from helper.worker_manager import WorkerManager
|
||||||
|
|
||||||
|
try: # Reuse the CLI selection parser instead of reimplementing it.
|
||||||
|
from CLI import _parse_selection_syntax
|
||||||
|
except ImportError: # pragma: no cover - fallback for atypical environments
|
||||||
|
_parse_selection_syntax = None # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class PipelineStageResult:
|
||||||
|
"""Summary for a single pipeline stage."""
|
||||||
|
|
||||||
|
name: str
|
||||||
|
args: Sequence[str]
|
||||||
|
emitted: List[Any] = field(default_factory=list)
|
||||||
|
status: str = "pending"
|
||||||
|
error: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class PipelineRunResult:
|
||||||
|
"""Aggregate result for a pipeline run."""
|
||||||
|
|
||||||
|
pipeline: str
|
||||||
|
success: bool
|
||||||
|
stages: List[PipelineStageResult] = field(default_factory=list)
|
||||||
|
emitted: List[Any] = field(default_factory=list)
|
||||||
|
stdout: str = ""
|
||||||
|
stderr: str = ""
|
||||||
|
error: Optional[str] = None
|
||||||
|
|
||||||
|
def to_summary(self) -> Dict[str, Any]:
|
||||||
|
"""Provide a JSON-friendly representation for logging or UI."""
|
||||||
|
return {
|
||||||
|
"pipeline": self.pipeline,
|
||||||
|
"success": self.success,
|
||||||
|
"error": self.error,
|
||||||
|
"stages": [
|
||||||
|
{
|
||||||
|
"name": stage.name,
|
||||||
|
"status": stage.status,
|
||||||
|
"error": stage.error,
|
||||||
|
"emitted": len(stage.emitted),
|
||||||
|
}
|
||||||
|
for stage in self.stages
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PipelineExecutor:
|
||||||
|
"""Thin wrapper over the cmdlet registry + pipeline context."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
config: Optional[Dict[str, Any]] = None,
|
||||||
|
worker_manager: Optional[WorkerManager] = None,
|
||||||
|
) -> None:
|
||||||
|
self._config = config or load_config()
|
||||||
|
self._worker_manager = worker_manager
|
||||||
|
if self._worker_manager is None:
|
||||||
|
self._worker_manager = self._ensure_worker_manager()
|
||||||
|
if self._worker_manager:
|
||||||
|
self._config["_worker_manager"] = self._worker_manager
|
||||||
|
|
||||||
|
@property
|
||||||
|
def worker_manager(self) -> Optional[WorkerManager]:
|
||||||
|
return self._worker_manager
|
||||||
|
|
||||||
|
def run_pipeline(
|
||||||
|
self,
|
||||||
|
pipeline_text: str,
|
||||||
|
*,
|
||||||
|
on_log: Optional[Callable[[str], None]] = None,
|
||||||
|
) -> PipelineRunResult:
|
||||||
|
"""Execute a pipeline string and return structured results.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pipeline_text: Raw pipeline text entered by the user.
|
||||||
|
on_log: Optional callback that receives human-readable log lines.
|
||||||
|
"""
|
||||||
|
normalized = pipeline_text.strip()
|
||||||
|
result = PipelineRunResult(pipeline=normalized, success=False)
|
||||||
|
if not normalized:
|
||||||
|
result.error = "Pipeline is empty"
|
||||||
|
return result
|
||||||
|
|
||||||
|
tokens = self._tokenize(normalized)
|
||||||
|
stages = self._split_stages(tokens)
|
||||||
|
if not stages:
|
||||||
|
result.error = "Pipeline contains no stages"
|
||||||
|
return result
|
||||||
|
|
||||||
|
ctx.reset()
|
||||||
|
ctx.set_current_command_text(normalized)
|
||||||
|
|
||||||
|
stdout_buffer = io.StringIO()
|
||||||
|
stderr_buffer = io.StringIO()
|
||||||
|
piped_result: Any = None
|
||||||
|
worker_session = self._start_worker_session(normalized)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with contextlib.redirect_stdout(stdout_buffer), contextlib.redirect_stderr(
|
||||||
|
stderr_buffer
|
||||||
|
):
|
||||||
|
for index, stage_tokens in enumerate(stages):
|
||||||
|
stage = self._execute_stage(
|
||||||
|
index=index,
|
||||||
|
total=len(stages),
|
||||||
|
stage_tokens=stage_tokens,
|
||||||
|
piped_input=piped_result,
|
||||||
|
on_log=on_log,
|
||||||
|
)
|
||||||
|
result.stages.append(stage)
|
||||||
|
|
||||||
|
if stage.status != "completed":
|
||||||
|
result.error = stage.error or f"Stage {stage.name} failed"
|
||||||
|
return result
|
||||||
|
|
||||||
|
if index == len(stages) - 1:
|
||||||
|
result.emitted = stage.emitted
|
||||||
|
else:
|
||||||
|
piped_result = stage.emitted
|
||||||
|
|
||||||
|
result.success = True
|
||||||
|
return result
|
||||||
|
finally:
|
||||||
|
result.stdout = stdout_buffer.getvalue()
|
||||||
|
result.stderr = stderr_buffer.getvalue()
|
||||||
|
ctx.clear_current_command_text()
|
||||||
|
if worker_session is not None:
|
||||||
|
status = "completed" if result.success else "error"
|
||||||
|
worker_session.finish(status=status, message=result.error or "")
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Stage execution helpers
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def _execute_stage(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
index: int,
|
||||||
|
total: int,
|
||||||
|
stage_tokens: Sequence[str],
|
||||||
|
piped_input: Any,
|
||||||
|
on_log: Optional[Callable[[str], None]],
|
||||||
|
) -> PipelineStageResult:
|
||||||
|
if not stage_tokens:
|
||||||
|
return PipelineStageResult(name="(empty)", args=[], status="skipped")
|
||||||
|
|
||||||
|
cmd_name = stage_tokens[0].replace("_", "-").lower()
|
||||||
|
stage_args = stage_tokens[1:]
|
||||||
|
stage = PipelineStageResult(name=cmd_name, args=stage_args)
|
||||||
|
|
||||||
|
if cmd_name.startswith("@"):
|
||||||
|
return self._apply_selection_stage(
|
||||||
|
token=cmd_name,
|
||||||
|
stage=stage,
|
||||||
|
piped_input=piped_input,
|
||||||
|
on_log=on_log,
|
||||||
|
)
|
||||||
|
|
||||||
|
cmd_fn = REGISTRY.get(cmd_name)
|
||||||
|
if not cmd_fn:
|
||||||
|
stage.status = "failed"
|
||||||
|
stage.error = f"Unknown command: {cmd_name}"
|
||||||
|
return stage
|
||||||
|
|
||||||
|
pipeline_ctx = ctx.PipelineStageContext(stage_index=index, total_stages=total)
|
||||||
|
ctx.set_stage_context(pipeline_ctx)
|
||||||
|
ctx.set_active(True)
|
||||||
|
ctx.set_last_stage(index == total - 1)
|
||||||
|
|
||||||
|
try:
|
||||||
|
return_code = cmd_fn(piped_input, list(stage_args), self._config)
|
||||||
|
except Exception as exc: # pragma: no cover - surfaced in UI
|
||||||
|
stage.status = "failed"
|
||||||
|
stage.error = f"{type(exc).__name__}: {exc}"
|
||||||
|
if on_log:
|
||||||
|
on_log(stage.error)
|
||||||
|
return stage
|
||||||
|
finally:
|
||||||
|
ctx.set_stage_context(None)
|
||||||
|
ctx.set_active(False)
|
||||||
|
|
||||||
|
emitted = list(getattr(pipeline_ctx, "emits", []) or [])
|
||||||
|
stage.emitted = emitted
|
||||||
|
|
||||||
|
if return_code != 0:
|
||||||
|
stage.status = "failed"
|
||||||
|
stage.error = f"Exit code {return_code}"
|
||||||
|
else:
|
||||||
|
stage.status = "completed"
|
||||||
|
stage.error = None
|
||||||
|
|
||||||
|
worker_id = self._current_worker_id()
|
||||||
|
if self._worker_manager and worker_id:
|
||||||
|
label = f"[Stage {index + 1}/{total}] {cmd_name} {stage.status}"
|
||||||
|
self._worker_manager.log_step(worker_id, label)
|
||||||
|
|
||||||
|
ctx.set_last_result_table(None, emitted)
|
||||||
|
ctx.set_last_items(emitted)
|
||||||
|
return stage
|
||||||
|
|
||||||
|
def _apply_selection_stage(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
token: str,
|
||||||
|
stage: PipelineStageResult,
|
||||||
|
piped_input: Any,
|
||||||
|
on_log: Optional[Callable[[str], None]],
|
||||||
|
) -> PipelineStageResult:
|
||||||
|
selection = self._parse_selection(token)
|
||||||
|
items = piped_input or []
|
||||||
|
if not isinstance(items, list):
|
||||||
|
items = list(items if isinstance(items, Sequence) else [items])
|
||||||
|
|
||||||
|
if not items:
|
||||||
|
stage.status = "failed"
|
||||||
|
stage.error = "Selection requested but there is no upstream data"
|
||||||
|
return stage
|
||||||
|
|
||||||
|
if selection is None:
|
||||||
|
stage.emitted = list(items)
|
||||||
|
else:
|
||||||
|
zero_based = sorted(i - 1 for i in selection if i > 0)
|
||||||
|
stage.emitted = [items[i] for i in zero_based if 0 <= i < len(items)]
|
||||||
|
|
||||||
|
if not stage.emitted:
|
||||||
|
stage.status = "failed"
|
||||||
|
stage.error = "Selection matched no rows"
|
||||||
|
return stage
|
||||||
|
|
||||||
|
ctx.set_last_items(stage.emitted)
|
||||||
|
ctx.set_last_result_table(None, stage.emitted)
|
||||||
|
stage.status = "completed"
|
||||||
|
if on_log:
|
||||||
|
on_log(f"Selected {len(stage.emitted)} item(s) via {token}")
|
||||||
|
return stage
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Worker/session helpers
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def _start_worker_session(self, pipeline_text: str) -> Optional[_WorkerSession]:
|
||||||
|
manager = self._ensure_worker_manager()
|
||||||
|
if manager is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
worker_id = f"tui_pipeline_{uuid.uuid4().hex[:8]}"
|
||||||
|
tracked = manager.track_worker(
|
||||||
|
worker_id,
|
||||||
|
worker_type="pipeline",
|
||||||
|
title="Pipeline run",
|
||||||
|
description=pipeline_text,
|
||||||
|
pipe=pipeline_text,
|
||||||
|
)
|
||||||
|
if not tracked:
|
||||||
|
return None
|
||||||
|
|
||||||
|
manager.log_step(worker_id, "Pipeline started")
|
||||||
|
self._config["_current_worker_id"] = worker_id
|
||||||
|
return _WorkerSession(manager=manager, worker_id=worker_id, config=self._config)
|
||||||
|
|
||||||
|
def _ensure_worker_manager(self) -> Optional[WorkerManager]:
|
||||||
|
if self._worker_manager:
|
||||||
|
return self._worker_manager
|
||||||
|
library_root = get_local_storage_path(self._config)
|
||||||
|
if not library_root:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
self._worker_manager = WorkerManager(Path(library_root), auto_refresh_interval=0)
|
||||||
|
self._config["_worker_manager"] = self._worker_manager
|
||||||
|
except Exception:
|
||||||
|
self._worker_manager = None
|
||||||
|
return self._worker_manager
|
||||||
|
|
||||||
|
def _current_worker_id(self) -> Optional[str]:
|
||||||
|
worker_id = self._config.get("_current_worker_id")
|
||||||
|
return str(worker_id) if worker_id else None
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Parsing helpers
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
@staticmethod
|
||||||
|
def _tokenize(pipeline_text: str) -> List[str]:
|
||||||
|
try:
|
||||||
|
return shlex.split(pipeline_text)
|
||||||
|
except ValueError:
|
||||||
|
return pipeline_text.split()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _split_stages(tokens: Sequence[str]) -> List[List[str]]:
|
||||||
|
stages: List[List[str]] = []
|
||||||
|
current: List[str] = []
|
||||||
|
for token in tokens:
|
||||||
|
if token == "|":
|
||||||
|
if current:
|
||||||
|
stages.append(current)
|
||||||
|
current = []
|
||||||
|
else:
|
||||||
|
current.append(token)
|
||||||
|
if current:
|
||||||
|
stages.append(current)
|
||||||
|
return stages
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_selection(token: str) -> Optional[Sequence[int]]:
|
||||||
|
if _parse_selection_syntax:
|
||||||
|
parsed = _parse_selection_syntax(token)
|
||||||
|
if parsed:
|
||||||
|
return sorted(parsed)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class _WorkerSession:
|
||||||
|
"""Minimal worker session wrapper for the TUI executor."""
|
||||||
|
|
||||||
|
def __init__(self, *, manager: WorkerManager, worker_id: str, config: Optional[Dict[str, Any]] = None) -> None:
|
||||||
|
self._manager = manager
|
||||||
|
self.worker_id = worker_id
|
||||||
|
self._config = config
|
||||||
|
|
||||||
|
def finish(self, *, status: str, message: str) -> None:
|
||||||
|
try:
|
||||||
|
self._manager.finish_worker(self.worker_id, result=status, error_msg=message)
|
||||||
|
self._manager.log_step(self.worker_id, f"Pipeline {status}")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if self._config and self._config.get("_current_worker_id") == self.worker_id:
|
||||||
|
self._config.pop("_current_worker_id", None)
|
||||||
332
TUI/tui.py
Normal file
332
TUI/tui.py
Normal file
@@ -0,0 +1,332 @@
|
|||||||
|
"""Modern Textual UI for driving Medeia-Macina pipelines."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional, Sequence
|
||||||
|
|
||||||
|
from textual import work
|
||||||
|
from textual.app import App, ComposeResult
|
||||||
|
from textual.binding import Binding
|
||||||
|
from textual.containers import Container, Horizontal, Vertical, VerticalScroll
|
||||||
|
from textual.widgets import (
|
||||||
|
Button,
|
||||||
|
DataTable,
|
||||||
|
Footer,
|
||||||
|
Header,
|
||||||
|
Input,
|
||||||
|
ListItem,
|
||||||
|
ListView,
|
||||||
|
Static,
|
||||||
|
TextArea,
|
||||||
|
Tree,
|
||||||
|
)
|
||||||
|
|
||||||
|
BASE_DIR = Path(__file__).resolve().parent
|
||||||
|
ROOT_DIR = BASE_DIR.parent
|
||||||
|
for path in (BASE_DIR, ROOT_DIR):
|
||||||
|
str_path = str(path)
|
||||||
|
if str_path not in sys.path:
|
||||||
|
sys.path.insert(0, str_path)
|
||||||
|
|
||||||
|
from menu_actions import ( # type: ignore # noqa: E402
|
||||||
|
PIPELINE_PRESETS,
|
||||||
|
PipelinePreset,
|
||||||
|
build_metadata_snapshot,
|
||||||
|
summarize_result,
|
||||||
|
)
|
||||||
|
from pipeline_runner import PipelineExecutor, PipelineRunResult # type: ignore # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
|
class PresetListItem(ListItem):
|
||||||
|
"""List entry that stores its pipeline preset."""
|
||||||
|
|
||||||
|
def __init__(self, preset: PipelinePreset) -> None:
|
||||||
|
super().__init__(
|
||||||
|
Static(
|
||||||
|
f"[b]{preset.label}[/b]\n[pale_green4]{preset.description}[/pale_green4]",
|
||||||
|
classes="preset-entry",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self.preset = preset
|
||||||
|
|
||||||
|
|
||||||
|
class PipelineHubApp(App):
|
||||||
|
"""Textual front-end that executes cmdlet pipelines inline."""
|
||||||
|
|
||||||
|
CSS_PATH = "tui.tcss"
|
||||||
|
BINDINGS = [
|
||||||
|
Binding("ctrl+enter", "run_pipeline", "Run Pipeline"),
|
||||||
|
Binding("f5", "refresh_workers", "Refresh Workers"),
|
||||||
|
Binding("ctrl+l", "focus_command", "Focus Input", show=False),
|
||||||
|
]
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
super().__init__()
|
||||||
|
self.executor = PipelineExecutor()
|
||||||
|
self.result_items: List[Any] = []
|
||||||
|
self.log_lines: List[str] = []
|
||||||
|
self.command_input: Optional[Input] = None
|
||||||
|
self.log_output: Optional[TextArea] = None
|
||||||
|
self.results_table: Optional[DataTable] = None
|
||||||
|
self.metadata_tree: Optional[Tree] = None
|
||||||
|
self.worker_table: Optional[DataTable] = None
|
||||||
|
self.preset_list: Optional[ListView] = None
|
||||||
|
self.status_panel: Optional[Static] = None
|
||||||
|
self._pipeline_running = False
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Layout
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def compose(self) -> ComposeResult: # noqa: D401 - Textual compose hook
|
||||||
|
yield Header(show_clock=True)
|
||||||
|
with Container(id="app-shell"):
|
||||||
|
with Horizontal(id="command-row"):
|
||||||
|
self.command_input = Input(
|
||||||
|
placeholder='download-data "<url>" | merge-file | add-tag | add-file -storage local',
|
||||||
|
id="pipeline-input",
|
||||||
|
)
|
||||||
|
yield self.command_input
|
||||||
|
yield Button("Run", id="run-button", variant="primary")
|
||||||
|
self.status_panel = Static("Idle", id="status-panel")
|
||||||
|
yield self.status_panel
|
||||||
|
with Horizontal(id="content-row"):
|
||||||
|
with VerticalScroll(id="left-pane"):
|
||||||
|
yield Static("Pipeline Presets", classes="section-title")
|
||||||
|
self.preset_list = ListView(
|
||||||
|
*(PresetListItem(preset) for preset in PIPELINE_PRESETS),
|
||||||
|
id="preset-list",
|
||||||
|
)
|
||||||
|
yield self.preset_list
|
||||||
|
yield Static("Logs", classes="section-title")
|
||||||
|
self.log_output = TextArea(id="log-output", read_only=True)
|
||||||
|
yield self.log_output
|
||||||
|
yield Static("Workers", classes="section-title")
|
||||||
|
self.worker_table = DataTable(id="workers-table")
|
||||||
|
yield self.worker_table
|
||||||
|
with Vertical(id="right-pane"):
|
||||||
|
yield Static("Results", classes="section-title")
|
||||||
|
self.results_table = DataTable(id="results-table")
|
||||||
|
yield self.results_table
|
||||||
|
yield Static("Metadata", classes="section-title")
|
||||||
|
self.metadata_tree = Tree("Run a pipeline", id="metadata-tree")
|
||||||
|
yield self.metadata_tree
|
||||||
|
yield Footer()
|
||||||
|
|
||||||
|
def on_mount(self) -> None:
|
||||||
|
if self.results_table:
|
||||||
|
self.results_table.add_columns("Row", "Title", "Source", "File")
|
||||||
|
if self.worker_table:
|
||||||
|
self.worker_table.add_columns("ID", "Type", "Status", "Details")
|
||||||
|
if self.executor.worker_manager:
|
||||||
|
self.set_interval(2.0, self.refresh_workers)
|
||||||
|
self.refresh_workers()
|
||||||
|
if self.command_input:
|
||||||
|
self.command_input.focus()
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Actions
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def action_focus_command(self) -> None:
|
||||||
|
if self.command_input:
|
||||||
|
self.command_input.focus()
|
||||||
|
|
||||||
|
def action_run_pipeline(self) -> None:
|
||||||
|
if self._pipeline_running:
|
||||||
|
self.notify("Pipeline already running", severity="warning", timeout=3)
|
||||||
|
return
|
||||||
|
if not self.command_input:
|
||||||
|
return
|
||||||
|
pipeline_text = self.command_input.value.strip()
|
||||||
|
if not pipeline_text:
|
||||||
|
self.notify("Enter a pipeline to run", severity="warning", timeout=3)
|
||||||
|
return
|
||||||
|
|
||||||
|
self._pipeline_running = True
|
||||||
|
self._set_status("Running…", level="info")
|
||||||
|
self._clear_log()
|
||||||
|
self._append_log_line(f"$ {pipeline_text}")
|
||||||
|
self._clear_results()
|
||||||
|
self._run_pipeline_background(pipeline_text)
|
||||||
|
|
||||||
|
def action_refresh_workers(self) -> None:
|
||||||
|
self.refresh_workers()
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Event handlers
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def on_button_pressed(self, event: Button.Pressed) -> None:
|
||||||
|
if event.button.id == "run-button":
|
||||||
|
self.action_run_pipeline()
|
||||||
|
|
||||||
|
def on_input_submitted(self, event: Input.Submitted) -> None:
|
||||||
|
if event.input.id == "pipeline-input":
|
||||||
|
self.action_run_pipeline()
|
||||||
|
|
||||||
|
def on_list_view_selected(self, event: ListView.Selected) -> None:
|
||||||
|
if isinstance(event.item, PresetListItem) and self.command_input:
|
||||||
|
self.command_input.value = event.item.preset.pipeline
|
||||||
|
self.notify(f"Loaded preset: {event.item.preset.label}", timeout=2)
|
||||||
|
event.stop()
|
||||||
|
|
||||||
|
def on_data_table_row_highlighted(self, event: DataTable.RowHighlighted) -> None:
|
||||||
|
if not self.results_table or event.control is not self.results_table:
|
||||||
|
return
|
||||||
|
index = event.cursor_row
|
||||||
|
if 0 <= index < len(self.result_items):
|
||||||
|
self._display_metadata(self.result_items[index])
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Pipeline execution helpers
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
@work(exclusive=True, thread=True)
|
||||||
|
def _run_pipeline_background(self, pipeline_text: str) -> None:
|
||||||
|
run_result = self.executor.run_pipeline(pipeline_text, on_log=self._log_from_worker)
|
||||||
|
self.call_from_thread(self._on_pipeline_finished, run_result)
|
||||||
|
|
||||||
|
def _on_pipeline_finished(self, run_result: PipelineRunResult) -> None:
|
||||||
|
self._pipeline_running = False
|
||||||
|
status_level = "success" if run_result.success else "error"
|
||||||
|
status_text = "Completed" if run_result.success else "Failed"
|
||||||
|
self._set_status(status_text, level=status_level)
|
||||||
|
|
||||||
|
if not run_result.success:
|
||||||
|
self.notify(run_result.error or "Pipeline failed", severity="error", timeout=6)
|
||||||
|
else:
|
||||||
|
self.notify("Pipeline completed", timeout=3)
|
||||||
|
|
||||||
|
if run_result.stdout.strip():
|
||||||
|
self._append_log_line("stdout:")
|
||||||
|
self._append_block(run_result.stdout)
|
||||||
|
if run_result.stderr.strip():
|
||||||
|
self._append_log_line("stderr:")
|
||||||
|
self._append_block(run_result.stderr)
|
||||||
|
|
||||||
|
for stage in run_result.stages:
|
||||||
|
summary = f"[{stage.status}] {stage.name} -> {len(stage.emitted)} item(s)"
|
||||||
|
if stage.error:
|
||||||
|
summary += f" ({stage.error})"
|
||||||
|
self._append_log_line(summary)
|
||||||
|
|
||||||
|
emitted = run_result.emitted
|
||||||
|
if isinstance(emitted, list):
|
||||||
|
self.result_items = emitted
|
||||||
|
elif emitted:
|
||||||
|
self.result_items = [emitted]
|
||||||
|
else:
|
||||||
|
self.result_items = []
|
||||||
|
|
||||||
|
self._populate_results_table()
|
||||||
|
self.refresh_workers()
|
||||||
|
|
||||||
|
def _log_from_worker(self, message: str) -> None:
|
||||||
|
self.call_from_thread(self._append_log_line, message)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# UI helpers
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def _populate_results_table(self) -> None:
|
||||||
|
if not self.results_table:
|
||||||
|
return
|
||||||
|
self.results_table.clear()
|
||||||
|
if not self.result_items:
|
||||||
|
self.results_table.add_row("—", "No results", "", "")
|
||||||
|
return
|
||||||
|
for idx, item in enumerate(self.result_items, start=1):
|
||||||
|
if isinstance(item, dict):
|
||||||
|
title = summarize_result(item)
|
||||||
|
source = item.get("source") or item.get("cmdlet_name") or item.get("cmdlet") or "—"
|
||||||
|
file_path = item.get("file_path") or item.get("path") or "—"
|
||||||
|
else:
|
||||||
|
title = str(item)
|
||||||
|
source = "—"
|
||||||
|
file_path = "—"
|
||||||
|
self.results_table.add_row(str(idx), title, source, file_path, key=str(idx - 1))
|
||||||
|
|
||||||
|
def _display_metadata(self, item: Any) -> None:
|
||||||
|
if not self.metadata_tree:
|
||||||
|
return
|
||||||
|
root = self.metadata_tree.root
|
||||||
|
root.label = "Metadata"
|
||||||
|
root.remove_children()
|
||||||
|
|
||||||
|
payload: Dict[str, Any]
|
||||||
|
if isinstance(item, dict):
|
||||||
|
file_path = item.get("file_path") or item.get("path")
|
||||||
|
if file_path:
|
||||||
|
payload = build_metadata_snapshot(Path(file_path))
|
||||||
|
else:
|
||||||
|
payload = item
|
||||||
|
else:
|
||||||
|
payload = {"value": str(item)}
|
||||||
|
|
||||||
|
self._populate_tree_node(root, payload)
|
||||||
|
root.expand_all()
|
||||||
|
|
||||||
|
def _populate_tree_node(self, node, data: Any) -> None:
|
||||||
|
if isinstance(data, dict):
|
||||||
|
for key, value in data.items():
|
||||||
|
child = node.add(f"[b]{key}[/b]")
|
||||||
|
self._populate_tree_node(child, value)
|
||||||
|
elif isinstance(data, Sequence) and not isinstance(data, (str, bytes)):
|
||||||
|
for idx, value in enumerate(data):
|
||||||
|
child = node.add(f"[{idx}]")
|
||||||
|
self._populate_tree_node(child, value)
|
||||||
|
else:
|
||||||
|
node.add(str(data))
|
||||||
|
|
||||||
|
def _clear_log(self) -> None:
|
||||||
|
self.log_lines = []
|
||||||
|
if self.log_output:
|
||||||
|
self.log_output.value = ""
|
||||||
|
|
||||||
|
def _append_log_line(self, line: str) -> None:
|
||||||
|
self.log_lines.append(line)
|
||||||
|
if len(self.log_lines) > 500:
|
||||||
|
self.log_lines = self.log_lines[-500:]
|
||||||
|
if self.log_output:
|
||||||
|
self.log_output.value = "\n".join(self.log_lines)
|
||||||
|
|
||||||
|
def _append_block(self, text: str) -> None:
|
||||||
|
for line in text.strip().splitlines():
|
||||||
|
self._append_log_line(f" {line}")
|
||||||
|
|
||||||
|
def _clear_results(self) -> None:
|
||||||
|
self.result_items = []
|
||||||
|
if self.results_table:
|
||||||
|
self.results_table.clear()
|
||||||
|
if self.metadata_tree:
|
||||||
|
self.metadata_tree.root.label = "Awaiting results"
|
||||||
|
self.metadata_tree.root.remove_children()
|
||||||
|
|
||||||
|
def _set_status(self, message: str, *, level: str = "info") -> None:
|
||||||
|
if not self.status_panel:
|
||||||
|
return
|
||||||
|
for css in ("status-info", "status-success", "status-error"):
|
||||||
|
self.status_panel.remove_class(css)
|
||||||
|
css_class = f"status-{level if level in {'success', 'error'} else 'info'}"
|
||||||
|
self.status_panel.add_class(css_class)
|
||||||
|
self.status_panel.update(message)
|
||||||
|
|
||||||
|
def refresh_workers(self) -> None:
|
||||||
|
if not self.worker_table:
|
||||||
|
return
|
||||||
|
manager = self.executor.worker_manager
|
||||||
|
self.worker_table.clear()
|
||||||
|
if manager is None:
|
||||||
|
self.worker_table.add_row("—", "—", "—", "Worker manager unavailable")
|
||||||
|
return
|
||||||
|
workers = manager.get_active_workers()
|
||||||
|
if not workers:
|
||||||
|
self.worker_table.add_row("—", "—", "—", "No active workers")
|
||||||
|
return
|
||||||
|
for worker in workers:
|
||||||
|
worker_id = str(worker.get("worker_id") or worker.get("id") or "?")[:8]
|
||||||
|
worker_type = str(worker.get("worker_type") or worker.get("type") or "?")
|
||||||
|
status = str(worker.get("status") or worker.get("result") or "running")
|
||||||
|
details = worker.get("current_step") or worker.get("description") or worker.get("pipe") or ""
|
||||||
|
self.worker_table.add_row(worker_id, worker_type, status, str(details)[:80])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
PipelineHubApp().run()
|
||||||
100
TUI/tui.tcss
Normal file
100
TUI/tui.tcss
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
#app-shell {
|
||||||
|
width: 100%;
|
||||||
|
height: 100%;
|
||||||
|
padding: 1 2;
|
||||||
|
background: $surface;
|
||||||
|
layout: vertical;
|
||||||
|
}
|
||||||
|
|
||||||
|
#command-row {
|
||||||
|
width: 100%;
|
||||||
|
height: auto;
|
||||||
|
background: $boost;
|
||||||
|
padding: 1;
|
||||||
|
border: round $primary;
|
||||||
|
}
|
||||||
|
|
||||||
|
#pipeline-input {
|
||||||
|
width: 1fr;
|
||||||
|
min-height: 3;
|
||||||
|
padding: 0 1;
|
||||||
|
margin-right: 1;
|
||||||
|
background: $surface;
|
||||||
|
color: $text;
|
||||||
|
border: round $primary;
|
||||||
|
}
|
||||||
|
|
||||||
|
#pipeline-input:focus {
|
||||||
|
border: double $primary;
|
||||||
|
background: $surface;
|
||||||
|
}
|
||||||
|
|
||||||
|
#status-panel {
|
||||||
|
min-width: 20;
|
||||||
|
text-style: bold;
|
||||||
|
content-align: center middle;
|
||||||
|
padding: 0 1;
|
||||||
|
border: solid $panel-darken-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#content-row {
|
||||||
|
width: 100%;
|
||||||
|
height: 1fr;
|
||||||
|
}
|
||||||
|
|
||||||
|
#left-pane,
|
||||||
|
#right-pane {
|
||||||
|
width: 1fr;
|
||||||
|
height: 100%;
|
||||||
|
padding: 1;
|
||||||
|
background: $panel;
|
||||||
|
border: round $panel-darken-2;
|
||||||
|
}
|
||||||
|
|
||||||
|
#left-pane {
|
||||||
|
max-width: 48;
|
||||||
|
}
|
||||||
|
|
||||||
|
.section-title {
|
||||||
|
text-style: bold;
|
||||||
|
color: $text-muted;
|
||||||
|
margin-top: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.preset-entry {
|
||||||
|
padding: 1;
|
||||||
|
border: tall $panel-darken-1;
|
||||||
|
margin-bottom: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#log-output {
|
||||||
|
height: 16;
|
||||||
|
}
|
||||||
|
|
||||||
|
#workers-table {
|
||||||
|
height: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
#results-table {
|
||||||
|
height: 1fr;
|
||||||
|
}
|
||||||
|
|
||||||
|
#metadata-tree {
|
||||||
|
height: 1fr;
|
||||||
|
border: round $panel-darken-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.status-info {
|
||||||
|
background: $boost;
|
||||||
|
color: $text;
|
||||||
|
}
|
||||||
|
|
||||||
|
.status-success {
|
||||||
|
background: $success 20%;
|
||||||
|
color: $success;
|
||||||
|
}
|
||||||
|
|
||||||
|
.status-error {
|
||||||
|
background: $error 20%;
|
||||||
|
color: $error;
|
||||||
|
}
|
||||||
139
cmdlets/__init__.py
Normal file
139
cmdlets/__init__.py
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Callable, Dict, Iterable, Sequence
|
||||||
|
from importlib import import_module as _import_module
|
||||||
|
|
||||||
|
# A cmdlet is a callable taking (result, args, config) -> int
|
||||||
|
Cmdlet = Callable[[Any, Sequence[str], Dict[str, Any]], int]
|
||||||
|
|
||||||
|
# Registry of command-name -> cmdlet function
|
||||||
|
REGISTRY: Dict[str, Cmdlet] = {}
|
||||||
|
|
||||||
|
|
||||||
|
def register(names: Iterable[str]):
|
||||||
|
"""Decorator to register a function under one or more command names.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
@register(["add-tag", "add-tags"])
|
||||||
|
def _run(result, args, config) -> int: ...
|
||||||
|
"""
|
||||||
|
def _wrap(fn: Cmdlet) -> Cmdlet:
|
||||||
|
for name in names:
|
||||||
|
REGISTRY[name.replace('_', '-').lower()] = fn
|
||||||
|
return fn
|
||||||
|
return _wrap
|
||||||
|
|
||||||
|
|
||||||
|
class AutoRegister:
|
||||||
|
"""Decorator that automatically registers a cmdlet function using CMDLET.aliases.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name="delete-file",
|
||||||
|
aliases=["del", "del-file"],
|
||||||
|
...
|
||||||
|
)
|
||||||
|
|
||||||
|
@AutoRegister(CMDLET)
|
||||||
|
def _run(result, args, config) -> int:
|
||||||
|
...
|
||||||
|
|
||||||
|
Registers the cmdlet under:
|
||||||
|
- Its main name from CMDLET.name
|
||||||
|
- All aliases from CMDLET.aliases
|
||||||
|
|
||||||
|
This allows the help display to show: "cmd: delete-file | alias: del, del-file"
|
||||||
|
"""
|
||||||
|
def __init__(self, cmdlet):
|
||||||
|
self.cmdlet = cmdlet
|
||||||
|
|
||||||
|
def __call__(self, fn: Cmdlet) -> Cmdlet:
|
||||||
|
"""Register fn for the main name and all aliases in cmdlet."""
|
||||||
|
normalized_name = None
|
||||||
|
|
||||||
|
# Register for main name first
|
||||||
|
if hasattr(self.cmdlet, 'name') and self.cmdlet.name:
|
||||||
|
normalized_name = self.cmdlet.name.replace('_', '-').lower()
|
||||||
|
REGISTRY[normalized_name] = fn
|
||||||
|
|
||||||
|
# Register for all aliases
|
||||||
|
if hasattr(self.cmdlet, 'aliases') and self.cmdlet.aliases:
|
||||||
|
for alias in self.cmdlet.aliases:
|
||||||
|
normalized_alias = alias.replace('_', '-').lower()
|
||||||
|
# Always register (aliases are separate from main name)
|
||||||
|
REGISTRY[normalized_alias] = fn
|
||||||
|
|
||||||
|
return fn
|
||||||
|
|
||||||
|
|
||||||
|
def get(cmd_name: str) -> Cmdlet | None:
|
||||||
|
return REGISTRY.get(cmd_name.replace('_', '-').lower())
|
||||||
|
|
||||||
|
|
||||||
|
def format_cmd_help(cmdlet) -> str:
|
||||||
|
"""Format a cmdlet for help display showing cmd:name and aliases.
|
||||||
|
|
||||||
|
Example output: "delete-file | aliases: del, del-file"
|
||||||
|
"""
|
||||||
|
if not hasattr(cmdlet, 'name'):
|
||||||
|
return str(cmdlet)
|
||||||
|
|
||||||
|
cmd_str = f"cmd: {cmdlet.name}"
|
||||||
|
|
||||||
|
if hasattr(cmdlet, 'aliases') and cmdlet.aliases:
|
||||||
|
aliases_str = ", ".join(cmdlet.aliases)
|
||||||
|
cmd_str += f" | aliases: {aliases_str}"
|
||||||
|
|
||||||
|
return cmd_str
|
||||||
|
|
||||||
|
|
||||||
|
# Dynamically import all cmdlet modules in this directory (ignore files starting with _ and __init__.py)
|
||||||
|
import os
|
||||||
|
cmdlet_dir = os.path.dirname(__file__)
|
||||||
|
for filename in os.listdir(cmdlet_dir):
|
||||||
|
if (
|
||||||
|
filename.endswith(".py")
|
||||||
|
and not filename.startswith("_")
|
||||||
|
and filename != "__init__.py"
|
||||||
|
):
|
||||||
|
mod_name = filename[:-3]
|
||||||
|
try:
|
||||||
|
module = _import_module(f".{mod_name}", __name__)
|
||||||
|
|
||||||
|
# Auto-register based on CMDLET object with exec function
|
||||||
|
# This allows cmdlets to be fully self-contained in the CMDLET object
|
||||||
|
if hasattr(module, 'CMDLET'):
|
||||||
|
cmdlet_obj = module.CMDLET
|
||||||
|
|
||||||
|
# Get the execution function from the CMDLET object
|
||||||
|
run_fn = getattr(cmdlet_obj, 'exec', None) if hasattr(cmdlet_obj, 'exec') else None
|
||||||
|
|
||||||
|
if callable(run_fn):
|
||||||
|
# Register main name
|
||||||
|
if hasattr(cmdlet_obj, 'name') and cmdlet_obj.name:
|
||||||
|
normalized_name = cmdlet_obj.name.replace('_', '-').lower()
|
||||||
|
REGISTRY[normalized_name] = run_fn
|
||||||
|
|
||||||
|
# Register all aliases
|
||||||
|
if hasattr(cmdlet_obj, 'aliases') and cmdlet_obj.aliases:
|
||||||
|
for alias in cmdlet_obj.aliases:
|
||||||
|
normalized_alias = alias.replace('_', '-').lower()
|
||||||
|
REGISTRY[normalized_alias] = run_fn
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Import root-level modules that also register cmdlets
|
||||||
|
# Note: search_libgen, search_soulseek, and search_debrid are now consolidated into search_provider.py
|
||||||
|
# Use search-file -provider libgen, -provider soulseek, or -provider debrid instead
|
||||||
|
for _root_mod in ("select_cmdlet",):
|
||||||
|
try:
|
||||||
|
_import_module(_root_mod)
|
||||||
|
except Exception:
|
||||||
|
# Allow missing optional modules
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Also import helper modules that register cmdlets
|
||||||
|
try:
|
||||||
|
import helper.alldebrid as _alldebrid
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
1229
cmdlets/_shared.py
Normal file
1229
cmdlets/_shared.py
Normal file
File diff suppressed because it is too large
Load Diff
910
cmdlets/add_file.py
Normal file
910
cmdlets/add_file.py
Normal file
@@ -0,0 +1,910 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, Optional, Sequence, Iterable, Tuple
|
||||||
|
from collections.abc import Iterable as IterableABC
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import models
|
||||||
|
import pipeline as ctx
|
||||||
|
from helper import hydrus as hydrus_wrapper
|
||||||
|
from helper.logger import log, debug
|
||||||
|
from helper.file_storage import FileStorage
|
||||||
|
from ._shared import (
|
||||||
|
Cmdlet, CmdletArg, parse_cmdlet_args, SharedArgs, create_pipe_object_result,
|
||||||
|
extract_tags_from_result, extract_title_from_result, extract_known_urls_from_result,
|
||||||
|
merge_sequences, extract_relationships, extract_duration
|
||||||
|
)
|
||||||
|
from helper.local_library import read_sidecar, find_sidecar, write_sidecar, LocalLibraryDB
|
||||||
|
from helper.utils import sha256_file
|
||||||
|
from metadata import embed_metadata_in_file
|
||||||
|
|
||||||
|
# Use official Hydrus supported filetypes from hydrus_wrapper
|
||||||
|
SUPPORTED_MEDIA_EXTENSIONS = hydrus_wrapper.ALL_SUPPORTED_EXTENSIONS
|
||||||
|
|
||||||
|
# Initialize file storage system
|
||||||
|
storage = FileStorage()
|
||||||
|
|
||||||
|
|
||||||
|
def _guess_media_kind_from_suffix(media_path: Path) -> str:
|
||||||
|
suffix = media_path.suffix.lower()
|
||||||
|
if suffix in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.wma', '.mka'}:
|
||||||
|
return 'audio'
|
||||||
|
if suffix in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
|
||||||
|
return 'video'
|
||||||
|
if suffix in {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff'}:
|
||||||
|
return 'image'
|
||||||
|
if suffix in {'.pdf', '.epub', '.txt', '.mobi', '.azw3', '.cbz', '.cbr', '.doc', '.docx'}:
|
||||||
|
return 'document'
|
||||||
|
return 'other'
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_media_kind(result: Any, media_path: Path) -> str:
|
||||||
|
if isinstance(result, models.PipeObject):
|
||||||
|
if getattr(result, 'media_kind', None):
|
||||||
|
return str(result.media_kind)
|
||||||
|
elif isinstance(result, dict):
|
||||||
|
media_kind = result.get('media_kind')
|
||||||
|
if media_kind:
|
||||||
|
return str(media_kind)
|
||||||
|
metadata = result.get('metadata')
|
||||||
|
if isinstance(metadata, dict) and metadata.get('media_kind'):
|
||||||
|
return str(metadata['media_kind'])
|
||||||
|
return _guess_media_kind_from_suffix(media_path)
|
||||||
|
|
||||||
|
|
||||||
|
def _load_sidecar_bundle(media_path: Path, origin: Optional[str] = None, config: Optional[dict] = None) -> tuple[Optional[Path], Optional[str], list[str], list[str]]:
|
||||||
|
# For local origin, try to read from local database first
|
||||||
|
if origin and origin.lower() == "local" and config:
|
||||||
|
try:
|
||||||
|
from helper.local_library import LocalLibraryDB
|
||||||
|
from config import get_local_storage_path
|
||||||
|
|
||||||
|
try:
|
||||||
|
db_root = get_local_storage_path(config)
|
||||||
|
except Exception:
|
||||||
|
db_root = None
|
||||||
|
|
||||||
|
if db_root:
|
||||||
|
try:
|
||||||
|
db = LocalLibraryDB(Path(db_root))
|
||||||
|
try:
|
||||||
|
# Get tags and metadata from database
|
||||||
|
tags = db.get_tags(media_path) or []
|
||||||
|
metadata = db.get_metadata(media_path) or {}
|
||||||
|
known_urls = metadata.get("known_urls") or []
|
||||||
|
file_hash = metadata.get("hash")
|
||||||
|
|
||||||
|
if tags or known_urls or file_hash:
|
||||||
|
debug(f"Found metadata in local database: {len(tags)} tag(s), {len(known_urls)} URL(s)")
|
||||||
|
return None, file_hash, tags, known_urls
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"⚠️ Could not query local database: {exc}", file=sys.stderr)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Fall back to sidecar file lookup
|
||||||
|
try:
|
||||||
|
sidecar_path = find_sidecar(media_path)
|
||||||
|
except Exception:
|
||||||
|
sidecar_path = None
|
||||||
|
if not sidecar_path or not sidecar_path.exists():
|
||||||
|
return None, None, [], []
|
||||||
|
try:
|
||||||
|
hash_value, tags, known_urls = read_sidecar(sidecar_path)
|
||||||
|
return sidecar_path, hash_value, tags or [], known_urls or []
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"⚠️ Failed to read sidecar for {media_path.name}: {exc}", file=sys.stderr)
|
||||||
|
return sidecar_path, None, [], []
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_file_hash(result: Any, fallback_hash: Optional[str], file_path: Path) -> Optional[str]:
|
||||||
|
candidate = None
|
||||||
|
if isinstance(result, models.PipeObject):
|
||||||
|
candidate = result.file_hash
|
||||||
|
elif isinstance(result, dict):
|
||||||
|
candidate = result.get('file_hash') or result.get('hash')
|
||||||
|
candidate = candidate or fallback_hash
|
||||||
|
if candidate:
|
||||||
|
return str(candidate)
|
||||||
|
try:
|
||||||
|
return sha256_file(file_path)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"⚠️ Could not compute SHA-256 for {file_path.name}: {exc}", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _cleanup_sidecar_files(media_path: Path, *extra_paths: Optional[Path]) -> None:
|
||||||
|
targets = [
|
||||||
|
media_path.parent / (media_path.name + '.metadata'),
|
||||||
|
media_path.parent / (media_path.name + '.notes'),
|
||||||
|
media_path.parent / (media_path.name + '.tags'),
|
||||||
|
media_path.parent / (media_path.name + '.tags.txt'),
|
||||||
|
]
|
||||||
|
targets.extend(extra_paths)
|
||||||
|
for target in targets:
|
||||||
|
if not target:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
path_obj = Path(target)
|
||||||
|
if path_obj.exists():
|
||||||
|
path_obj.unlink()
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
def _persist_local_metadata(
|
||||||
|
library_root: Path,
|
||||||
|
dest_path: Path,
|
||||||
|
tags: list[str],
|
||||||
|
known_urls: list[str],
|
||||||
|
file_hash: Optional[str],
|
||||||
|
relationships: Optional[Dict[str, Any]],
|
||||||
|
duration: Optional[float],
|
||||||
|
media_kind: str,
|
||||||
|
) -> None:
|
||||||
|
payload = {
|
||||||
|
'hash': file_hash,
|
||||||
|
'known_urls': known_urls,
|
||||||
|
'relationships': relationships or [],
|
||||||
|
'duration': duration,
|
||||||
|
'size': None,
|
||||||
|
'ext': dest_path.suffix.lower(),
|
||||||
|
'media_type': media_kind,
|
||||||
|
'media_kind': media_kind,
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
payload['size'] = dest_path.stat().st_size
|
||||||
|
except OSError:
|
||||||
|
payload['size'] = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
debug(f"[_persist_local_metadata] Saving metadata to DB at: {library_root}")
|
||||||
|
db_path = Path(library_root) / ".downlow_library.db"
|
||||||
|
debug(f"[_persist_local_metadata] Database file: {db_path}, exists: {db_path.exists()}")
|
||||||
|
debug(f"[_persist_local_metadata] File: {dest_path}, exists: {dest_path.exists()}, Tags: {len(tags)}, Hash: {file_hash}")
|
||||||
|
debug(f"[_persist_local_metadata] Absolute dest_path: {dest_path.resolve()}")
|
||||||
|
|
||||||
|
with LocalLibraryDB(library_root) as db:
|
||||||
|
# Save metadata FIRST to ensure file entry is created in DB
|
||||||
|
if any(payload.values()):
|
||||||
|
debug(f"[_persist_local_metadata] Saving metadata payload first")
|
||||||
|
try:
|
||||||
|
db.save_metadata(dest_path, payload)
|
||||||
|
debug(f"[_persist_local_metadata] ✅ Metadata saved")
|
||||||
|
except Exception as meta_exc:
|
||||||
|
log(f"[_persist_local_metadata] ❌ Failed to save metadata: {meta_exc}", file=sys.stderr)
|
||||||
|
raise
|
||||||
|
|
||||||
|
# Save tags to DB synchronously in same transaction
|
||||||
|
# For local storage, DB is the primary source of truth
|
||||||
|
if tags:
|
||||||
|
try:
|
||||||
|
debug(f"[_persist_local_metadata] Saving {len(tags)} tags to DB")
|
||||||
|
db.save_tags(dest_path, tags)
|
||||||
|
debug(f"[_persist_local_metadata] ✅ Tags saved to DB")
|
||||||
|
except Exception as tag_exc:
|
||||||
|
log(f"[_persist_local_metadata] ⚠️ Failed to save tags to DB: {tag_exc}", file=sys.stderr)
|
||||||
|
raise
|
||||||
|
|
||||||
|
# NOTE: Sidecar files are intentionally NOT created for local storage
|
||||||
|
# Local storage uses database as primary source, not sidecar files
|
||||||
|
|
||||||
|
debug(f"[_persist_local_metadata] ✅ Metadata persisted successfully")
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"⚠️ Failed to persist metadata to local database: {exc}", file=sys.stderr)
|
||||||
|
import traceback
|
||||||
|
log(traceback.format_exc(), file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
|
def _handle_local_transfer(media_path: Path, destination_root: Path, result: Any, config: Optional[Dict[str, Any]] = None) -> Tuple[int, Optional[Path]]:
|
||||||
|
"""Transfer a file to local storage and return (exit_code, destination_path).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
media_path: Path to source file
|
||||||
|
destination_root: Destination directory
|
||||||
|
result: Result object with metadata
|
||||||
|
config: Configuration dictionary
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (exit_code, destination_path)
|
||||||
|
- exit_code: 0 on success, 1 on failure
|
||||||
|
- destination_path: Path to moved file on success, None on failure
|
||||||
|
"""
|
||||||
|
destination_root = destination_root.expanduser()
|
||||||
|
try:
|
||||||
|
destination_root.mkdir(parents=True, exist_ok=True)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"❌ Cannot prepare destination directory {destination_root}: {exc}", file=sys.stderr)
|
||||||
|
return 1, None
|
||||||
|
|
||||||
|
|
||||||
|
tags_from_result = extract_tags_from_result(result)
|
||||||
|
urls_from_result = extract_known_urls_from_result(result)
|
||||||
|
# Get origin from result if available
|
||||||
|
result_origin = None
|
||||||
|
if hasattr(result, "origin"):
|
||||||
|
result_origin = result.origin
|
||||||
|
elif isinstance(result, dict):
|
||||||
|
result_origin = result.get("origin") or result.get("source")
|
||||||
|
sidecar_path, sidecar_hash, sidecar_tags, sidecar_urls = _load_sidecar_bundle(media_path, origin=result_origin, config=config)
|
||||||
|
|
||||||
|
# Normalize all title tags to use spaces instead of underscores BEFORE merging
|
||||||
|
# This ensures that "Radiohead - Creep" and "Radiohead_-_Creep" are treated as the same title
|
||||||
|
def normalize_title_tag(tag: str) -> str:
|
||||||
|
"""Normalize a title tag by replacing underscores with spaces."""
|
||||||
|
if str(tag).strip().lower().startswith("title:"):
|
||||||
|
parts = tag.split(":", 1)
|
||||||
|
if len(parts) == 2:
|
||||||
|
value = parts[1].replace("_", " ").strip()
|
||||||
|
return f"title:{value}"
|
||||||
|
return tag
|
||||||
|
|
||||||
|
tags_from_result = [normalize_title_tag(t) for t in tags_from_result]
|
||||||
|
sidecar_tags = [normalize_title_tag(t) for t in sidecar_tags]
|
||||||
|
|
||||||
|
# Merge tags carefully: if URL has title tag, don't include sidecar title tags
|
||||||
|
# This prevents duplicate title: tags when URL provides a title
|
||||||
|
has_url_title = any(str(t).strip().lower().startswith("title:") for t in tags_from_result)
|
||||||
|
if has_url_title:
|
||||||
|
# URL has a title, filter out any sidecar title tags to avoid duplication
|
||||||
|
sidecar_tags_filtered = [t for t in sidecar_tags if not str(t).strip().lower().startswith("title:")]
|
||||||
|
merged_tags = merge_sequences(tags_from_result, sidecar_tags_filtered, case_sensitive=True)
|
||||||
|
else:
|
||||||
|
# No URL title, use all sidecar tags
|
||||||
|
merged_tags = merge_sequences(tags_from_result, sidecar_tags, case_sensitive=True)
|
||||||
|
|
||||||
|
merged_urls = merge_sequences(urls_from_result, sidecar_urls, case_sensitive=False)
|
||||||
|
relationships = extract_relationships(result)
|
||||||
|
duration = extract_duration(result)
|
||||||
|
|
||||||
|
try:
|
||||||
|
dest_file = storage["local"].upload(media_path, location=str(destination_root), move=True)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"❌ Failed to move file into {destination_root}: {exc}", file=sys.stderr)
|
||||||
|
return 1, None
|
||||||
|
|
||||||
|
dest_path = Path(dest_file)
|
||||||
|
file_hash = _resolve_file_hash(result, sidecar_hash, dest_path)
|
||||||
|
media_kind = _resolve_media_kind(result, dest_path)
|
||||||
|
|
||||||
|
# Ensure only ONE title tag that matches the actual filename
|
||||||
|
# Remove all existing title tags and add one based on the saved filename
|
||||||
|
merged_tags_no_titles = [t for t in merged_tags if not str(t).strip().lower().startswith("title:")]
|
||||||
|
filename_title = dest_path.stem.replace("_", " ").strip()
|
||||||
|
if filename_title:
|
||||||
|
merged_tags_no_titles.insert(0, f"title:{filename_title}")
|
||||||
|
|
||||||
|
_persist_local_metadata(destination_root, dest_path, merged_tags_no_titles, merged_urls, file_hash, relationships, duration, media_kind)
|
||||||
|
_cleanup_sidecar_files(media_path, sidecar_path)
|
||||||
|
debug(f"✅ Moved to local library: {dest_path}")
|
||||||
|
return 0, dest_path
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
"""Upload/copy a file to specified location.
|
||||||
|
|
||||||
|
Returns 0 on success, non-zero on failure.
|
||||||
|
"""
|
||||||
|
import sys # For stderr output
|
||||||
|
|
||||||
|
# Help
|
||||||
|
try:
|
||||||
|
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args):
|
||||||
|
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||||
|
return 0
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
debug("Starting add-file cmdlet")
|
||||||
|
|
||||||
|
# Handle list of results (from piped commands that emit multiple items)
|
||||||
|
if isinstance(result, list):
|
||||||
|
debug(f"Processing {len(result)} piped files")
|
||||||
|
success_count = 0
|
||||||
|
for item in result:
|
||||||
|
exit_code = _run(item, _args, config)
|
||||||
|
if exit_code == 0:
|
||||||
|
success_count += 1
|
||||||
|
return 0 if success_count > 0 else 1
|
||||||
|
|
||||||
|
# Parse arguments using CMDLET spec
|
||||||
|
parsed = parse_cmdlet_args(_args, CMDLET)
|
||||||
|
location: Optional[str] = None
|
||||||
|
provider_name: Optional[str] = None
|
||||||
|
delete_after_upload = False
|
||||||
|
|
||||||
|
# Check if -path argument was provided to use direct file path instead of piped result
|
||||||
|
path_arg = parsed.get("path")
|
||||||
|
if path_arg:
|
||||||
|
# Create a pseudo-result object from the file path
|
||||||
|
media_path = Path(str(path_arg).strip())
|
||||||
|
if not media_path.exists():
|
||||||
|
log(f"❌ File not found: {media_path}")
|
||||||
|
return 1
|
||||||
|
# Create result dict with the file path and origin 'wild' for direct path inputs
|
||||||
|
result = {"target": str(media_path), "origin": "wild"}
|
||||||
|
log(f"Using direct file path: {media_path}")
|
||||||
|
|
||||||
|
# Get location from parsed args - now uses SharedArgs.STORAGE so key is "storage"
|
||||||
|
location = parsed.get("storage")
|
||||||
|
if location:
|
||||||
|
location = str(location).lower().strip()
|
||||||
|
|
||||||
|
# Get file provider from parsed args
|
||||||
|
provider_name = parsed.get("provider")
|
||||||
|
if provider_name:
|
||||||
|
provider_name = str(provider_name).lower().strip()
|
||||||
|
|
||||||
|
# Check for delete flag (presence in parsed dict means it was provided)
|
||||||
|
delete_after_upload = "delete" in parsed
|
||||||
|
|
||||||
|
# Either storage or provider must be specified, but not both
|
||||||
|
if location is None and provider_name is None:
|
||||||
|
log("Either -storage or -provider must be specified")
|
||||||
|
log(" -storage options: 'hydrus', 'local', or a directory path")
|
||||||
|
log(" -provider options: '0x0'")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if location is not None and provider_name is not None:
|
||||||
|
log("❌ Cannot specify both -storage and -provider")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Validate location (storage backends)
|
||||||
|
is_valid_location = False
|
||||||
|
if location is not None:
|
||||||
|
valid_locations = {'hydrus', 'local'}
|
||||||
|
is_valid_location = location in valid_locations
|
||||||
|
is_local_path = not is_valid_location and location is not None and ('/' in location or '\\' in location or ':' in location)
|
||||||
|
|
||||||
|
if location is not None and not (is_valid_location or is_local_path):
|
||||||
|
log(f"❌ Invalid location: {location}")
|
||||||
|
log(f"Valid options: 'hydrus', '0x0', 'local', or a directory path (e.g., C:\\Music or /home/user/music)")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Extract tags/known URLs from pipeline objects if available
|
||||||
|
pipe_object_tags = extract_tags_from_result(result)
|
||||||
|
if pipe_object_tags:
|
||||||
|
log(f"Extracted {len(pipe_object_tags)} tag(s) from pipeline result: {', '.join(pipe_object_tags[:5])}", file=sys.stderr)
|
||||||
|
pipe_known_urls = extract_known_urls_from_result(result)
|
||||||
|
|
||||||
|
# Resolve media path: get from piped result
|
||||||
|
# Support both object attributes (getattr) and dict keys (get)
|
||||||
|
target = None
|
||||||
|
origin = None
|
||||||
|
|
||||||
|
# Try object attributes first
|
||||||
|
if hasattr(result, "target"):
|
||||||
|
target = result.target
|
||||||
|
elif hasattr(result, "path"):
|
||||||
|
target = result.path
|
||||||
|
elif hasattr(result, "file_path"):
|
||||||
|
target = result.file_path
|
||||||
|
# Try dict keys if object attributes failed
|
||||||
|
elif isinstance(result, dict):
|
||||||
|
target = (result.get("target") or result.get("path") or result.get("file_path") or
|
||||||
|
result.get("__file_path") or result.get("__path") or result.get("__target"))
|
||||||
|
|
||||||
|
# Get origin to detect Hydrus files
|
||||||
|
if hasattr(result, "origin"):
|
||||||
|
origin = result.origin
|
||||||
|
elif hasattr(result, "source"):
|
||||||
|
origin = result.source
|
||||||
|
elif isinstance(result, dict):
|
||||||
|
origin = result.get("origin") or result.get("source") or result.get("__source")
|
||||||
|
|
||||||
|
# Convert target to string and preserve URLs (don't let Path() mangle them)
|
||||||
|
target_str = str(target) if target else None
|
||||||
|
|
||||||
|
# Check if this is a playlist item that needs to be downloaded first
|
||||||
|
is_playlist_item = isinstance(result, dict) and result.get("__source") == "playlist-probe"
|
||||||
|
if is_playlist_item and target_str and target_str.lower().startswith(("http://", "https://")):
|
||||||
|
# This is a playlist item URL - we need to download it first
|
||||||
|
log(f"Detected playlist item, downloading: {target_str}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Extract item number if available
|
||||||
|
item_num = None
|
||||||
|
if "__action" in result and result["__action"].startswith("playlist-item:"):
|
||||||
|
item_num = result["__action"].split(":")[1]
|
||||||
|
elif "index" in result:
|
||||||
|
item_num = result["index"]
|
||||||
|
|
||||||
|
# Call download-data to download this specific item
|
||||||
|
# Pass the item number so it knows which track to download
|
||||||
|
from cmdlets import download_data as dl_module
|
||||||
|
|
||||||
|
# Capture emissions from download-data to process them
|
||||||
|
captured_results = []
|
||||||
|
original_emit = ctx.emit
|
||||||
|
|
||||||
|
def capture_emit(obj):
|
||||||
|
captured_results.append(obj)
|
||||||
|
# Also emit to original so user sees progress/output if needed
|
||||||
|
# But since add-file is usually terminal, we might not need to
|
||||||
|
# original_emit(obj)
|
||||||
|
|
||||||
|
# Temporarily hook the pipeline emit function
|
||||||
|
ctx.emit = capture_emit
|
||||||
|
|
||||||
|
try:
|
||||||
|
if item_num:
|
||||||
|
# Pass a marker dict to tell download-data which item to get
|
||||||
|
download_result = dl_module._run(
|
||||||
|
{
|
||||||
|
"__playlist_url": str(target_str),
|
||||||
|
"__playlist_item": int(item_num)
|
||||||
|
},
|
||||||
|
[],
|
||||||
|
config
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Fallback: just download the URL (will show all items)
|
||||||
|
download_result = dl_module._run(None, [str(target_str)], config)
|
||||||
|
finally:
|
||||||
|
# Restore original emit function
|
||||||
|
ctx.emit = original_emit
|
||||||
|
|
||||||
|
if download_result != 0:
|
||||||
|
log(f"❌ Failed to download playlist item", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
log(f"✓ Playlist item downloaded, processing {len(captured_results)} file(s)...", file=sys.stderr)
|
||||||
|
|
||||||
|
# Process the downloaded files recursively
|
||||||
|
success_count = 0
|
||||||
|
for res in captured_results:
|
||||||
|
# Recursively call add-file with the downloaded result
|
||||||
|
# This ensures tags and metadata from download-data are applied
|
||||||
|
if _run(res, _args, config) == 0:
|
||||||
|
success_count += 1
|
||||||
|
|
||||||
|
return 0 if success_count > 0 else 1
|
||||||
|
# Determine media_path from result
|
||||||
|
media_path: Optional[Path] = None
|
||||||
|
is_hydrus_file = origin and origin.lower() == "hydrus"
|
||||||
|
|
||||||
|
if target_str:
|
||||||
|
# Check if it's a URL or Hydrus hash
|
||||||
|
if target_str.lower().startswith(("http://", "https://")):
|
||||||
|
media_path = None # Will handle as Hydrus file below
|
||||||
|
elif not is_hydrus_file:
|
||||||
|
# Only treat as local path if not a Hydrus file
|
||||||
|
media_path = Path(target_str)
|
||||||
|
|
||||||
|
if media_path is None and not is_hydrus_file and (target_str is None or not target_str.lower().startswith(("http://", "https://"))):
|
||||||
|
# Check if this is a format object from download-data
|
||||||
|
if isinstance(result, dict) and result.get('format_id') is not None:
|
||||||
|
log("❌ Format object received, but add-file expects a downloaded file")
|
||||||
|
log(f" Tip: Use @N to automatically select and download the format")
|
||||||
|
log(f" Streamlined workflow:")
|
||||||
|
log(f" download-data \"URL\" | @{result.get('index', 'N')} | add-file -storage local")
|
||||||
|
log(f" (The @N automatically expands to download-data \"URL\" -item N)")
|
||||||
|
return 1
|
||||||
|
log("❌ File not found: provide a piped file result or local file path")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Check if this is a Hydrus file - fetch the actual file path from Hydrus
|
||||||
|
if is_hydrus_file and target_str:
|
||||||
|
log(f"Detected Hydrus file (hash: {target_str}), fetching local path from Hydrus...", file=sys.stderr)
|
||||||
|
try:
|
||||||
|
from helper import hydrus
|
||||||
|
|
||||||
|
# Get the Hydrus client
|
||||||
|
client = hydrus.get_client(config)
|
||||||
|
if not client:
|
||||||
|
log(f"❌ Hydrus client unavailable", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# target_str is the hash - need to get the actual file path from Hydrus
|
||||||
|
file_hash = target_str
|
||||||
|
|
||||||
|
# Call the /get_files/file_path endpoint to get the actual file path
|
||||||
|
response = client.get_file_path(file_hash)
|
||||||
|
if not response or not isinstance(response, dict):
|
||||||
|
log(f"❌ Hydrus file_path endpoint returned invalid response", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
file_path_str = response.get("path")
|
||||||
|
if not file_path_str:
|
||||||
|
log(f"❌ Hydrus file_path endpoint did not return a path", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
media_path = Path(file_path_str)
|
||||||
|
if not media_path.exists():
|
||||||
|
log(f"❌ Hydrus file path does not exist: {media_path}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
log(f"✓ Retrieved Hydrus file path: {media_path}", file=sys.stderr)
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"❌ Failed to get Hydrus file path: {exc}", file=sys.stderr)
|
||||||
|
import traceback
|
||||||
|
log(f"Traceback: {traceback.format_exc()}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Generic URL handler: if target is a URL and we haven't resolved a local path yet
|
||||||
|
# This handles cases like "search-file -provider openlibrary ... | add-file -storage local"
|
||||||
|
if target_str and target_str.lower().startswith(("http://", "https://")) and not is_hydrus_file and not is_playlist_item and media_path is None:
|
||||||
|
log(f"Target is a URL, delegating to download-data: {target_str}", file=sys.stderr)
|
||||||
|
from cmdlets import download_data as dl_module
|
||||||
|
|
||||||
|
dl_args = []
|
||||||
|
if location:
|
||||||
|
dl_args.extend(["-storage", location])
|
||||||
|
|
||||||
|
# Map provider 0x0 to storage 0x0 for download-data
|
||||||
|
if provider_name == "0x0":
|
||||||
|
dl_args.extend(["-storage", "0x0"])
|
||||||
|
|
||||||
|
return dl_module._run(result, dl_args, config)
|
||||||
|
|
||||||
|
if media_path is None:
|
||||||
|
log("File path could not be resolved")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if not media_path.exists() or not media_path.is_file():
|
||||||
|
log(f"File not found: {media_path}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Validate file type - only accept Hydrus-supported files
|
||||||
|
file_extension = media_path.suffix.lower()
|
||||||
|
if file_extension not in SUPPORTED_MEDIA_EXTENSIONS:
|
||||||
|
log(f"❌ Unsupported file type: {file_extension}", file=sys.stderr)
|
||||||
|
log(f"Hydrus supports the following file types:", file=sys.stderr)
|
||||||
|
# Display by category from hydrus_wrapper
|
||||||
|
for category, extensions in sorted(hydrus_wrapper.SUPPORTED_FILETYPES.items()):
|
||||||
|
ext_list = ', '.join(sorted(e.lstrip('.') for e in extensions.keys()))
|
||||||
|
log(f"{category.capitalize()}: {ext_list}", file=sys.stderr)
|
||||||
|
log(f"Skipping this file: {media_path.name}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Handle based on provider or storage
|
||||||
|
if provider_name is not None:
|
||||||
|
# Use file provider (e.g., 0x0.st)
|
||||||
|
from helper.search_provider import get_file_provider
|
||||||
|
|
||||||
|
log(f"Uploading via {provider_name} file provider: {media_path.name}", file=sys.stderr)
|
||||||
|
|
||||||
|
try:
|
||||||
|
file_provider = get_file_provider(provider_name, config)
|
||||||
|
if file_provider is None:
|
||||||
|
log(f"❌ File provider '{provider_name}' not available", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
hoster_url = file_provider.upload(media_path)
|
||||||
|
log(f"✅ File uploaded to {provider_name}: {hoster_url}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Associate the URL with the file in Hydrus if possible
|
||||||
|
current_hash = locals().get('file_hash')
|
||||||
|
if not current_hash:
|
||||||
|
current_hash = _resolve_file_hash(result, None, media_path)
|
||||||
|
|
||||||
|
if current_hash:
|
||||||
|
try:
|
||||||
|
client = hydrus_wrapper.get_client(config)
|
||||||
|
if client:
|
||||||
|
client.associate_url(current_hash, hoster_url)
|
||||||
|
log(f"✅ Associated URL with file hash {current_hash}", file=sys.stderr)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"⚠️ Could not associate URL with Hydrus file: {exc}", file=sys.stderr)
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"❌ {provider_name} upload failed: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if delete_after_upload:
|
||||||
|
try:
|
||||||
|
media_path.unlink()
|
||||||
|
_cleanup_sidecar_files(media_path)
|
||||||
|
log(f"✅ Deleted file and sidecar", file=sys.stderr)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"⚠️ Could not delete file: {exc}", file=sys.stderr)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Handle storage-based operations (location is not None here)
|
||||||
|
valid_locations = {'hydrus', 'local'}
|
||||||
|
is_valid_location = location in valid_locations
|
||||||
|
is_local_path = not is_valid_location and ('/' in location or '\\' in location or ':' in location)
|
||||||
|
|
||||||
|
if not (is_valid_location or is_local_path):
|
||||||
|
log(f"❌ Invalid location: {location}")
|
||||||
|
log(f"Valid options: 'hydrus', 'local', or a directory path (e.g., C:\\Music or /home/user/music)")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if location == 'local':
|
||||||
|
try:
|
||||||
|
from config import get_local_storage_path
|
||||||
|
resolved_dir = get_local_storage_path(config)
|
||||||
|
except Exception:
|
||||||
|
resolved_dir = None
|
||||||
|
|
||||||
|
if not resolved_dir:
|
||||||
|
resolved_dir = config.get("LocalDir") or config.get("OutputDir")
|
||||||
|
|
||||||
|
if not resolved_dir:
|
||||||
|
log("❌ No local storage path configured. Set 'storage.local.path' in config.json", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
log(f"Moving into configured local library: {resolved_dir}", file=sys.stderr)
|
||||||
|
exit_code, dest_path = _handle_local_transfer(media_path, Path(resolved_dir), result, config)
|
||||||
|
|
||||||
|
# After successful local transfer, emit result for pipeline continuation
|
||||||
|
# This allows downstream commands like add-tags to chain automatically
|
||||||
|
if exit_code == 0 and dest_path:
|
||||||
|
# Extract tags from result for emission
|
||||||
|
emit_tags = extract_tags_from_result(result)
|
||||||
|
file_hash = _resolve_file_hash(result, None, dest_path)
|
||||||
|
|
||||||
|
# Extract title from original result, fallback to filename if not available
|
||||||
|
result_title = extract_title_from_result(result) or dest_path.name
|
||||||
|
|
||||||
|
# Always emit result for local files, even if no tags
|
||||||
|
# This allows @N selection and piping to downstream commands
|
||||||
|
result_dict = create_pipe_object_result(
|
||||||
|
source='local',
|
||||||
|
identifier=str(dest_path),
|
||||||
|
file_path=str(dest_path),
|
||||||
|
cmdlet_name='add-file',
|
||||||
|
title=result_title,
|
||||||
|
file_hash=file_hash,
|
||||||
|
tags=emit_tags if emit_tags else [],
|
||||||
|
target=str(dest_path) # Explicit target for get-file
|
||||||
|
)
|
||||||
|
ctx.emit(result_dict)
|
||||||
|
|
||||||
|
# Clear the stage table so downstream @N doesn't try to re-run download-data
|
||||||
|
# Next stage will use these local file results, not format objects
|
||||||
|
ctx.set_current_stage_table(None)
|
||||||
|
|
||||||
|
return exit_code
|
||||||
|
|
||||||
|
elif is_local_path:
|
||||||
|
try:
|
||||||
|
destination_root = Path(location)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"❌ Invalid destination path '{location}': {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
log(f"Moving to local path: {destination_root}", file=sys.stderr)
|
||||||
|
exit_code, dest_path = _handle_local_transfer(media_path, destination_root, result, config)
|
||||||
|
|
||||||
|
# After successful local transfer, emit result for pipeline continuation
|
||||||
|
if exit_code == 0 and dest_path:
|
||||||
|
# Extract tags from result for emission
|
||||||
|
emit_tags = extract_tags_from_result(result)
|
||||||
|
file_hash = _resolve_file_hash(result, None, dest_path)
|
||||||
|
|
||||||
|
# Extract title from original result, fallback to filename if not available
|
||||||
|
result_title = extract_title_from_result(result) or dest_path.name
|
||||||
|
|
||||||
|
# Always emit result for local files, even if no tags
|
||||||
|
# This allows @N selection and piping to downstream commands
|
||||||
|
result_dict = create_pipe_object_result(
|
||||||
|
source='local',
|
||||||
|
identifier=str(dest_path),
|
||||||
|
file_path=str(dest_path),
|
||||||
|
cmdlet_name='add-file',
|
||||||
|
title=result_title,
|
||||||
|
file_hash=file_hash,
|
||||||
|
tags=emit_tags if emit_tags else [],
|
||||||
|
target=str(dest_path) # Explicit target for get-file
|
||||||
|
)
|
||||||
|
ctx.emit(result_dict)
|
||||||
|
|
||||||
|
# Clear the stage table so downstream @N doesn't try to re-run download-data
|
||||||
|
# Next stage will use these local file results, not format objects
|
||||||
|
ctx.set_current_stage_table(None)
|
||||||
|
|
||||||
|
return exit_code
|
||||||
|
|
||||||
|
# location == 'hydrus'
|
||||||
|
# Compute file hash to check if already in Hydrus
|
||||||
|
log(f"Uploading to Hydrus: {media_path.name}", file=sys.stderr)
|
||||||
|
log(f"Computing SHA-256 hash for: {media_path.name}", file=sys.stderr)
|
||||||
|
try:
|
||||||
|
file_hash = sha256_file(media_path)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"❌ Failed to compute file hash: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
log(f"File hash: {file_hash}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Read sidecar tags and known URLs first (for tagging)
|
||||||
|
|
||||||
|
sidecar_path, hash_from_sidecar, sidecar_tags, sidecar_urls = _load_sidecar_bundle(media_path, origin=origin, config=config)
|
||||||
|
if sidecar_path:
|
||||||
|
log(f"Found sidecar at: {sidecar_path}", file=sys.stderr)
|
||||||
|
log(f"Read sidecar: hash={hash_from_sidecar}, {len(sidecar_tags)} tag(s), {len(sidecar_urls)} URL(s)", file=sys.stderr)
|
||||||
|
if sidecar_tags:
|
||||||
|
log(f"Sidecar tags: {sidecar_tags}", file=sys.stderr)
|
||||||
|
if sidecar_urls:
|
||||||
|
log(f"Sidecar URLs: {sidecar_urls}", file=sys.stderr)
|
||||||
|
else:
|
||||||
|
log(f"No sidecar found for {media_path.name}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Normalize all title tags to use spaces instead of underscores BEFORE merging
|
||||||
|
# This ensures that "Radiohead - Creep" and "Radiohead_-_Creep" are treated as the same title
|
||||||
|
def normalize_title_tag(tag: str) -> str:
|
||||||
|
"""Normalize a title tag by replacing underscores with spaces."""
|
||||||
|
if str(tag).strip().lower().startswith("title:"):
|
||||||
|
parts = tag.split(":", 1)
|
||||||
|
if len(parts) == 2:
|
||||||
|
value = parts[1].replace("_", " ").strip()
|
||||||
|
return f"title:{value}"
|
||||||
|
return tag
|
||||||
|
|
||||||
|
sidecar_tags = [normalize_title_tag(t) for t in sidecar_tags]
|
||||||
|
pipe_object_tags = [normalize_title_tag(t) for t in pipe_object_tags]
|
||||||
|
|
||||||
|
# Merge tags from PipeObject with tags from sidecar
|
||||||
|
# NOTE: Remove ALL existing title tags and use only filename-based title
|
||||||
|
# The filename is the source of truth for the title
|
||||||
|
tags_without_titles = [t for t in merge_sequences(sidecar_tags, pipe_object_tags, case_sensitive=True)
|
||||||
|
if not str(t).strip().lower().startswith("title:")]
|
||||||
|
|
||||||
|
# Ensure ONE title tag based on the actual filename
|
||||||
|
filename_title = media_path.stem.replace("_", " ").strip()
|
||||||
|
if filename_title:
|
||||||
|
tags = [f"title:{filename_title}"] + tags_without_titles
|
||||||
|
else:
|
||||||
|
tags = tags_without_titles
|
||||||
|
|
||||||
|
known_urls = merge_sequences(sidecar_urls, pipe_known_urls, case_sensitive=False)
|
||||||
|
|
||||||
|
if pipe_object_tags:
|
||||||
|
log(f"Merged pipeline tags. Total tags now: {len(tags)}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Write metadata to file before uploading (only for local storage, not for Hydrus)
|
||||||
|
# Hydrus stores tags separately, so we don't need to modify the file
|
||||||
|
if location != 'hydrus':
|
||||||
|
try:
|
||||||
|
if tags:
|
||||||
|
# Determine file kind from extension
|
||||||
|
file_kind = ''
|
||||||
|
sfx = media_path.suffix.lower()
|
||||||
|
if sfx in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.mka'}:
|
||||||
|
file_kind = 'audio'
|
||||||
|
elif sfx in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
|
||||||
|
file_kind = 'video'
|
||||||
|
|
||||||
|
if embed_metadata_in_file(media_path, tags, file_kind):
|
||||||
|
log(f"Wrote metadata tags to file: {media_path.name}", file=sys.stderr)
|
||||||
|
else:
|
||||||
|
log(f"Note: Could not embed metadata in file (may not be supported format)", file=sys.stderr)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Warning: Failed to write metadata to file: {exc}", file=sys.stderr)
|
||||||
|
else:
|
||||||
|
log(f"Note: Skipping FFmpeg metadata embedding for Hydrus (tags managed separately)", file=sys.stderr)
|
||||||
|
|
||||||
|
# Use FileStorage backend to upload to Hydrus
|
||||||
|
try:
|
||||||
|
file_hash = storage["hydrus"].upload(
|
||||||
|
media_path,
|
||||||
|
config=config,
|
||||||
|
tags=tags,
|
||||||
|
)
|
||||||
|
log(f"✅ File uploaded to Hydrus: {file_hash}", file=sys.stderr)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"❌ Hydrus upload failed: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Associate known URLs in Hydrus metadata
|
||||||
|
url_count = 0
|
||||||
|
if known_urls:
|
||||||
|
try:
|
||||||
|
client = hydrus_wrapper.get_client(config)
|
||||||
|
if client:
|
||||||
|
for url in known_urls:
|
||||||
|
u = str(url or "").strip()
|
||||||
|
if not u:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
client.associate_url(file_hash, u)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Hydrus associate-url failed for {u}: {exc}", file=sys.stderr)
|
||||||
|
continue
|
||||||
|
url_count += 1
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Failed to associate URLs: {exc}", file=sys.stderr)
|
||||||
|
|
||||||
|
if url_count:
|
||||||
|
log(f"✅ Associated {url_count} URL(s)", file=sys.stderr)
|
||||||
|
else:
|
||||||
|
log(f"No URLs to associate", file=sys.stderr)
|
||||||
|
|
||||||
|
_cleanup_sidecar_files(media_path, sidecar_path)
|
||||||
|
|
||||||
|
# Update in-memory result for downstream pipes
|
||||||
|
try:
|
||||||
|
# Only update piped result objects; direct -path usage may have a dummy result
|
||||||
|
setattr(result, "hash_hex", file_hash)
|
||||||
|
# Preserve media_kind for downstream commands (e.g., open)
|
||||||
|
if not hasattr(result, "media_kind") or getattr(result, "media_kind") == "other":
|
||||||
|
# Try to infer media_kind from file extension or keep existing
|
||||||
|
suffix = media_path.suffix.lower()
|
||||||
|
if suffix in {'.pdf', '.epub', '.txt', '.mobi', '.azw3', '.cbz', '.cbr', '.rtf', '.md', '.html', '.htm', '.doc', '.docx'}:
|
||||||
|
setattr(result, "media_kind", "document")
|
||||||
|
if hasattr(result, "columns") and isinstance(getattr(result, "columns"), list):
|
||||||
|
cols = list(getattr(result, "columns"))
|
||||||
|
if ("Hash", file_hash) not in cols:
|
||||||
|
cols.append(("Hash", file_hash))
|
||||||
|
setattr(result, "columns", cols)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# If -delete flag is set, delete the file and .tags after successful upload
|
||||||
|
if delete_after_upload:
|
||||||
|
log(f"Deleting local files (as requested)...", file=sys.stderr)
|
||||||
|
try:
|
||||||
|
media_path.unlink()
|
||||||
|
log(f"✅ Deleted: {media_path.name}", file=sys.stderr)
|
||||||
|
except OSError as exc:
|
||||||
|
log(f"Failed to delete file: {exc}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Delete .tags sidecar if it exists
|
||||||
|
if sidecar_path is not None:
|
||||||
|
try:
|
||||||
|
sidecar_path.unlink()
|
||||||
|
log(f"✅ Deleted: {sidecar_path.name}", file=sys.stderr)
|
||||||
|
except OSError as exc:
|
||||||
|
log(f"Failed to delete sidecar: {exc}", file=sys.stderr)
|
||||||
|
|
||||||
|
log(f"✅ Successfully completed: {media_path.name} (hash={file_hash})", file=sys.stderr)
|
||||||
|
|
||||||
|
# Emit result for Hydrus uploads so downstream commands know about it
|
||||||
|
if location == 'hydrus':
|
||||||
|
# Extract title from original result, fallback to filename if not available
|
||||||
|
result_title = extract_title_from_result(result) or media_path.name
|
||||||
|
|
||||||
|
result_dict = create_pipe_object_result(
|
||||||
|
source='hydrus',
|
||||||
|
identifier=file_hash,
|
||||||
|
file_path=f"hydrus:{file_hash}",
|
||||||
|
cmdlet_name='add-file',
|
||||||
|
title=result_title,
|
||||||
|
file_hash=file_hash,
|
||||||
|
extra={
|
||||||
|
'storage_source': 'hydrus',
|
||||||
|
'hydrus_hash': file_hash,
|
||||||
|
'tags': tags,
|
||||||
|
'known_urls': known_urls,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
ctx.emit(result_dict)
|
||||||
|
|
||||||
|
# Clear the stage table so downstream @N doesn't try to re-run download-data
|
||||||
|
# Next stage will use these Hydrus file results, not format objects
|
||||||
|
ctx.set_current_stage_table(None)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name="add-file",
|
||||||
|
summary="Upload a media file to specified location (Hydrus, file provider, or local directory).",
|
||||||
|
usage="add-file (-path <filepath> | <piped>) (-storage <location> | -provider <fileprovider>) [-delete]",
|
||||||
|
args=[
|
||||||
|
CmdletArg(name="path", type="str", required=False, description="Direct file path to upload (alternative to piped result)", alias="p"),
|
||||||
|
SharedArgs.STORAGE, # For hydrus, local, or directory paths
|
||||||
|
CmdletArg(name="provider", type="str", required=False, description="File hosting provider (e.g., 0x0 for 0x0.st)", alias="prov"),
|
||||||
|
CmdletArg(name="delete", type="flag", required=False, description="Delete the file and its .tags after successful upload.", alias="del"),
|
||||||
|
],
|
||||||
|
details=[
|
||||||
|
"- Storage location options (use -storage):",
|
||||||
|
" hydrus: Upload to Hydrus database with metadata tagging",
|
||||||
|
" local: Copy file to local directory",
|
||||||
|
" <path>: Copy file to specified directory",
|
||||||
|
"- File provider options (use -provider):",
|
||||||
|
" 0x0: Upload to 0x0.st for temporary hosting with public URL",
|
||||||
|
"- Accepts files from official Hydrus supported types: images, animations, videos, audio, applications, projects, and archives.",
|
||||||
|
"- When uploading to Hydrus: adds tags from .tags sidecar and associates known_urls",
|
||||||
|
"- When using file provider: uploads to service, adds URL to sidecar",
|
||||||
|
"- When copying locally: copies file with original metadata preserved",
|
||||||
|
"- Use -delete flag to automatically delete the file and .tags after successful operation.",
|
||||||
|
],
|
||||||
|
)
|
||||||
84
cmdlets/add_note.py
Normal file
84
cmdlets/add_note.py
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, Sequence
|
||||||
|
import json
|
||||||
|
|
||||||
|
from . import register
|
||||||
|
import models
|
||||||
|
import pipeline as ctx
|
||||||
|
from helper import hydrus as hydrus_wrapper
|
||||||
|
from ._shared import Cmdlet, CmdletArg, normalize_hash
|
||||||
|
from helper.logger import log
|
||||||
|
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name="add-note",
|
||||||
|
summary="Add or set a note on a Hydrus file.",
|
||||||
|
usage="add-note [-hash <sha256>] <name> <text>",
|
||||||
|
args=[
|
||||||
|
CmdletArg("hash", type="string", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
|
||||||
|
CmdletArg("name", type="string", required=True, description="The note name/key to set (e.g. 'comment', 'source', etc.)."),
|
||||||
|
CmdletArg("text", type="string", required=True, description="The note text/content to store.", variadic=True),
|
||||||
|
],
|
||||||
|
details=[
|
||||||
|
"- Notes are stored in the 'my notes' service by default.",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@register(["add-note", "set-note", "add_note"]) # aliases
|
||||||
|
def add(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
# Help
|
||||||
|
try:
|
||||||
|
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||||
|
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||||
|
return 0
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
from ._shared import parse_cmdlet_args
|
||||||
|
parsed = parse_cmdlet_args(args, CMDLET)
|
||||||
|
override_hash = parsed.get("hash")
|
||||||
|
name = parsed.get("name")
|
||||||
|
text_parts = parsed.get("text")
|
||||||
|
|
||||||
|
if not name:
|
||||||
|
log("Requires a note name")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
name = str(name).strip()
|
||||||
|
|
||||||
|
if isinstance(text_parts, list):
|
||||||
|
text = " ".join(text_parts).strip()
|
||||||
|
else:
|
||||||
|
text = str(text_parts or "").strip()
|
||||||
|
|
||||||
|
if not text:
|
||||||
|
log("Empty note text")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Handle @N selection which creates a list - extract the first item
|
||||||
|
if isinstance(result, list) and len(result) > 0:
|
||||||
|
result = result[0]
|
||||||
|
|
||||||
|
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
|
||||||
|
if not hash_hex:
|
||||||
|
log("Selected result does not include a Hydrus hash")
|
||||||
|
return 1
|
||||||
|
try:
|
||||||
|
client = hydrus_wrapper.get_client(config)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Hydrus client unavailable: {exc}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if client is None:
|
||||||
|
log("Hydrus client unavailable")
|
||||||
|
return 1
|
||||||
|
try:
|
||||||
|
service_name = "my notes"
|
||||||
|
client.set_notes(hash_hex, {name: text}, service_name)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Hydrus add-note failed: {exc}")
|
||||||
|
return 1
|
||||||
|
ctx.emit(f"Added note '{name}' ({len(text)} chars)")
|
||||||
|
return 0
|
||||||
|
|
||||||
264
cmdlets/add_relationship.py
Normal file
264
cmdlets/add_relationship.py
Normal file
@@ -0,0 +1,264 @@
|
|||||||
|
"""Add file relationships in Hydrus based on relationship tags in sidecar."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, Optional, Sequence
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from helper.logger import log
|
||||||
|
|
||||||
|
from . import register
|
||||||
|
import models
|
||||||
|
import pipeline as ctx
|
||||||
|
from helper import hydrus as hydrus_wrapper
|
||||||
|
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args
|
||||||
|
from helper.local_library import read_sidecar, find_sidecar
|
||||||
|
|
||||||
|
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name="add-relationship",
|
||||||
|
summary="Associate file relationships (king/alt/related) in Hydrus based on relationship tags in sidecar.",
|
||||||
|
usage="add-relationship OR add-relationship -path <file>",
|
||||||
|
args=[
|
||||||
|
CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."),
|
||||||
|
],
|
||||||
|
details=[
|
||||||
|
"- Reads relationship tags from sidecar (format: 'relationship: hash(king)<HASH>,hash(alt)<HASH>,hash(related)<HASH>')",
|
||||||
|
"- Calls Hydrus API to associate the hashes as relationships",
|
||||||
|
"- Supports three relationship types: king (primary), alt (alternative), related (other versions)",
|
||||||
|
"- Works with piped file results or -path argument for direct invocation",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _normalise_hash_hex(value: Optional[str]) -> Optional[str]:
|
||||||
|
"""Normalize a hash hex string to lowercase 64-char format."""
|
||||||
|
if not value or not isinstance(value, str):
|
||||||
|
return None
|
||||||
|
normalized = value.strip().lower()
|
||||||
|
if len(normalized) == 64 and all(c in '0123456789abcdef' for c in normalized):
|
||||||
|
return normalized
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_relationships_from_tag(tag_value: str) -> Dict[str, list[str]]:
|
||||||
|
"""Parse relationship tag like 'relationship: hash(king)<HASH>,hash(alt)<HASH>'.
|
||||||
|
|
||||||
|
Returns a dict like {"king": ["HASH1"], "alt": ["HASH2"], ...}
|
||||||
|
"""
|
||||||
|
result: Dict[str, list[str]] = {}
|
||||||
|
if not isinstance(tag_value, str):
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Match patterns like hash(king)HASH or hash(type)HASH (no angle brackets)
|
||||||
|
pattern = r'hash\((\w+)\)([a-fA-F0-9]{64})'
|
||||||
|
matches = re.findall(pattern, tag_value)
|
||||||
|
|
||||||
|
for rel_type, hash_value in matches:
|
||||||
|
normalized = _normalise_hash_hex(hash_value)
|
||||||
|
if normalized:
|
||||||
|
if rel_type not in result:
|
||||||
|
result[rel_type] = []
|
||||||
|
result[rel_type].append(normalized)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@register(["add-relationship", "add-rel"]) # primary name and alias
|
||||||
|
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
"""Associate file relationships in Hydrus.
|
||||||
|
|
||||||
|
Two modes of operation:
|
||||||
|
1. Read from sidecar: Looks for relationship tags in the file's sidecar (format: "relationship: hash(king)<HASH>,hash(alt)<HASH>")
|
||||||
|
2. Pipeline mode: When piping multiple results, the first becomes "king" and subsequent items become "alt"
|
||||||
|
|
||||||
|
Returns 0 on success, non-zero on failure.
|
||||||
|
"""
|
||||||
|
# Help
|
||||||
|
try:
|
||||||
|
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args):
|
||||||
|
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||||
|
return 0
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Parse arguments using CMDLET spec
|
||||||
|
parsed = parse_cmdlet_args(_args, CMDLET)
|
||||||
|
arg_path: Optional[Path] = None
|
||||||
|
if parsed:
|
||||||
|
# Get the first arg value (e.g., -path)
|
||||||
|
first_arg_name = CMDLET.get("args", [{}])[0].get("name") if CMDLET.get("args") else None
|
||||||
|
if first_arg_name and first_arg_name in parsed:
|
||||||
|
arg_value = parsed[first_arg_name]
|
||||||
|
try:
|
||||||
|
arg_path = Path(str(arg_value)).expanduser()
|
||||||
|
except Exception:
|
||||||
|
arg_path = Path(str(arg_value))
|
||||||
|
|
||||||
|
# Get Hydrus client
|
||||||
|
try:
|
||||||
|
client = hydrus_wrapper.get_client(config)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if client is None:
|
||||||
|
log("Hydrus client unavailable", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Handle @N selection which creates a list - extract the first item
|
||||||
|
if isinstance(result, list) and len(result) > 0:
|
||||||
|
result = result[0]
|
||||||
|
|
||||||
|
# Check if we're in pipeline mode (have a hash) or file mode
|
||||||
|
file_hash = getattr(result, "hash_hex", None)
|
||||||
|
|
||||||
|
# PIPELINE MODE: Track relationships across multiple items
|
||||||
|
if file_hash:
|
||||||
|
file_hash = _normalise_hash_hex(file_hash)
|
||||||
|
if not file_hash:
|
||||||
|
log("Invalid file hash format", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Load or initialize king hash from pipeline context
|
||||||
|
try:
|
||||||
|
king_hash = ctx.load_value("relationship_king")
|
||||||
|
except Exception:
|
||||||
|
king_hash = None
|
||||||
|
|
||||||
|
# If this is the first item, make it the king
|
||||||
|
if not king_hash:
|
||||||
|
try:
|
||||||
|
ctx.store_value("relationship_king", file_hash)
|
||||||
|
log(f"Established king hash: {file_hash}", file=sys.stderr)
|
||||||
|
return 0 # First item just becomes the king, no relationships yet
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# If we already have a king and this is a different hash, link them
|
||||||
|
if king_hash and king_hash != file_hash:
|
||||||
|
try:
|
||||||
|
client.set_relationship(file_hash, king_hash, "alt")
|
||||||
|
log(
|
||||||
|
f"[add-relationship] Set alt relationship: {file_hash} <-> {king_hash}",
|
||||||
|
file=sys.stderr
|
||||||
|
)
|
||||||
|
return 0
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Failed to set relationship: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# FILE MODE: Read relationships from sidecar
|
||||||
|
log("Note: Use piping mode for easier relationships. Example: 1,2,3 | add-relationship", file=sys.stderr)
|
||||||
|
|
||||||
|
# Resolve media path from -path arg or result target
|
||||||
|
target = getattr(result, "target", None) or getattr(result, "path", None)
|
||||||
|
media_path = arg_path if arg_path is not None else Path(str(target)) if isinstance(target, str) else None
|
||||||
|
if media_path is None:
|
||||||
|
log("Provide -path <file> or pipe a local file result", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Validate local file
|
||||||
|
if str(media_path).lower().startswith(("http://", "https://")):
|
||||||
|
log("This cmdlet requires a local file path, not a URL", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
if not media_path.exists() or not media_path.is_file():
|
||||||
|
log(f"File not found: {media_path}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Build Hydrus client
|
||||||
|
try:
|
||||||
|
client = hydrus_wrapper.get_client(config)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if client is None:
|
||||||
|
log("Hydrus client unavailable", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Read sidecar to find relationship tags
|
||||||
|
sidecar_path = find_sidecar(media_path)
|
||||||
|
if sidecar_path is None:
|
||||||
|
log(f"No sidecar found for {media_path.name}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
_, tags, _ = read_sidecar(sidecar_path)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Failed to read sidecar: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Find relationship tags (format: "relationship: hash(king)<HASH>,hash(alt)<HASH>,hash(related)<HASH>")
|
||||||
|
relationship_tags = [t for t in tags if isinstance(t, str) and t.lower().startswith("relationship:")]
|
||||||
|
|
||||||
|
if not relationship_tags:
|
||||||
|
log(f"No relationship tags found in sidecar", file=sys.stderr)
|
||||||
|
return 0 # Not an error, just nothing to do
|
||||||
|
|
||||||
|
# Get the file hash from result (should have been set by add-file)
|
||||||
|
file_hash = getattr(result, "hash_hex", None)
|
||||||
|
if not file_hash:
|
||||||
|
log("File hash not available (run add-file first)", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
file_hash = _normalise_hash_hex(file_hash)
|
||||||
|
if not file_hash:
|
||||||
|
log("Invalid file hash format", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Parse relationships from tags and apply them
|
||||||
|
success_count = 0
|
||||||
|
error_count = 0
|
||||||
|
|
||||||
|
for rel_tag in relationship_tags:
|
||||||
|
try:
|
||||||
|
# Parse: "relationship: hash(king)<HASH>,hash(alt)<HASH>,hash(related)<HASH>"
|
||||||
|
rel_str = rel_tag.split(":", 1)[1].strip() # Get part after "relationship:"
|
||||||
|
|
||||||
|
# Parse relationships
|
||||||
|
rels = _extract_relationships_from_tag(f"relationship: {rel_str}")
|
||||||
|
|
||||||
|
# Set the relationships in Hydrus
|
||||||
|
for rel_type, related_hashes in rels.items():
|
||||||
|
if not related_hashes:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for related_hash in related_hashes:
|
||||||
|
# Don't set relationship between hash and itself
|
||||||
|
if file_hash == related_hash:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
client.set_relationship(file_hash, related_hash, rel_type)
|
||||||
|
log(
|
||||||
|
f"[add-relationship] Set {rel_type} relationship: "
|
||||||
|
f"{file_hash} <-> {related_hash}",
|
||||||
|
file=sys.stderr
|
||||||
|
)
|
||||||
|
success_count += 1
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Failed to set {rel_type} relationship: {exc}", file=sys.stderr)
|
||||||
|
error_count += 1
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Failed to parse relationship tag: {exc}", file=sys.stderr)
|
||||||
|
error_count += 1
|
||||||
|
|
||||||
|
if success_count > 0:
|
||||||
|
log(f"Successfully set {success_count} relationship(s) for {media_path.name}", file=sys.stderr)
|
||||||
|
ctx.emit(f"add-relationship: {media_path.name} ({success_count} relationships set)")
|
||||||
|
return 0
|
||||||
|
elif error_count == 0:
|
||||||
|
log(f"No relationships to set", file=sys.stderr)
|
||||||
|
return 0 # Success with nothing to do
|
||||||
|
else:
|
||||||
|
log(f"Failed with {error_count} error(s)", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
276
cmdlets/add_tags.py
Normal file
276
cmdlets/add_tags.py
Normal file
@@ -0,0 +1,276 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, List, Sequence, Optional
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from helper.logger import log
|
||||||
|
|
||||||
|
from . import register
|
||||||
|
import models
|
||||||
|
import pipeline as ctx
|
||||||
|
from ._shared import normalize_result_input, filter_results_by_temp
|
||||||
|
from helper import hydrus as hydrus_wrapper
|
||||||
|
from helper.local_library import read_sidecar, write_sidecar, find_sidecar, has_sidecar, LocalLibraryDB
|
||||||
|
from metadata import rename_by_metadata
|
||||||
|
from ._shared import Cmdlet, CmdletArg, normalize_hash, parse_tag_arguments, expand_tag_groups, parse_cmdlet_args
|
||||||
|
from config import get_local_storage_path
|
||||||
|
|
||||||
|
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name="add-tags",
|
||||||
|
summary="Add tags to a Hydrus file or write them to a local .tags sidecar.",
|
||||||
|
usage="add-tags [-hash <sha256>] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]",
|
||||||
|
args=[
|
||||||
|
CmdletArg("-hash", type="string", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
|
||||||
|
CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"),
|
||||||
|
CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."),
|
||||||
|
CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tags non-temporary files)."),
|
||||||
|
CmdletArg("tags", type="string", required=True, description="One or more tags to add. Comma- or space-separated. Can also use {list_name} syntax.", variadic=True),
|
||||||
|
],
|
||||||
|
details=[
|
||||||
|
"- By default, only tags non-temporary files (from pipelines). Use --all to tag everything.",
|
||||||
|
"- Without -hash and when the selection is a local file, tags are written to <file>.tags.",
|
||||||
|
"- With a Hydrus hash, tags are sent to the 'my tags' service.",
|
||||||
|
"- Multiple tags can be comma-separated or space-separated.",
|
||||||
|
"- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult",
|
||||||
|
"- Tags can also reference lists with curly braces: add-tag {philosophy} \"other:tag\"",
|
||||||
|
"- Use -duplicate to copy EXISTING tag values to new namespaces:",
|
||||||
|
" Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)",
|
||||||
|
" Inferred format: -duplicate title,album,artist (first is source, rest are targets)",
|
||||||
|
"- The source namespace must already exist in the file being tagged.",
|
||||||
|
"- Target namespaces that already have a value are skipped (not overwritten).",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
@register(["add-tag", "add-tags"])
|
||||||
|
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
"""Add tags to a file with smart filtering for pipeline results."""
|
||||||
|
try:
|
||||||
|
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||||
|
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||||
|
return 0
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
parsed = parse_cmdlet_args(args, CMDLET)
|
||||||
|
|
||||||
|
# Check for --all flag
|
||||||
|
include_temp = parsed.get("all", False)
|
||||||
|
|
||||||
|
# Normalize input to list
|
||||||
|
results = normalize_result_input(result)
|
||||||
|
|
||||||
|
# Filter by temp status (unless --all is set)
|
||||||
|
if not include_temp:
|
||||||
|
results = filter_results_by_temp(results, include_temp=False)
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
log("No valid files to tag (all results were temporary; use --all to include temporary files)", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Get tags from arguments
|
||||||
|
raw_tags = parsed.get("tags", [])
|
||||||
|
if isinstance(raw_tags, str):
|
||||||
|
raw_tags = [raw_tags]
|
||||||
|
|
||||||
|
# Handle -list argument (convert to {list} syntax)
|
||||||
|
list_arg = parsed.get("list")
|
||||||
|
if list_arg:
|
||||||
|
for l in list_arg.split(','):
|
||||||
|
l = l.strip()
|
||||||
|
if l:
|
||||||
|
raw_tags.append(f"{{{l}}}")
|
||||||
|
|
||||||
|
# Parse and expand tags
|
||||||
|
tags_to_add = parse_tag_arguments(raw_tags)
|
||||||
|
tags_to_add = expand_tag_groups(tags_to_add)
|
||||||
|
|
||||||
|
# Get other flags
|
||||||
|
hash_override = normalize_hash(parsed.get("hash"))
|
||||||
|
duplicate_arg = parsed.get("duplicate")
|
||||||
|
|
||||||
|
# If no tags provided (and no list), write sidecar files with embedded tags
|
||||||
|
# Note: Since 'tags' is required=True in CMDLET, this block might be unreachable via CLI
|
||||||
|
# unless called programmatically or if required check is bypassed.
|
||||||
|
if not tags_to_add and not duplicate_arg:
|
||||||
|
# Write sidecar files with the tags that are already in the result dicts
|
||||||
|
sidecar_count = 0
|
||||||
|
for res in results:
|
||||||
|
# Handle both dict and PipeObject formats
|
||||||
|
file_path = None
|
||||||
|
tags = []
|
||||||
|
file_hash = ""
|
||||||
|
|
||||||
|
if isinstance(res, models.PipeObject):
|
||||||
|
file_path = res.file_path
|
||||||
|
tags = res.extra.get('tags', [])
|
||||||
|
file_hash = res.file_hash or ""
|
||||||
|
elif isinstance(res, dict):
|
||||||
|
file_path = res.get('file_path')
|
||||||
|
tags = res.get('tags', []) # Check both tags and extra['tags']
|
||||||
|
if not tags and 'extra' in res:
|
||||||
|
tags = res['extra'].get('tags', [])
|
||||||
|
file_hash = res.get('file_hash', "")
|
||||||
|
|
||||||
|
if not file_path:
|
||||||
|
log(f"[add_tags] Warning: Result has no file_path, skipping", file=sys.stderr)
|
||||||
|
ctx.emit(res)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if tags:
|
||||||
|
# Write sidecar file for this file with its tags
|
||||||
|
try:
|
||||||
|
sidecar_path = write_sidecar(Path(file_path), tags, [], file_hash)
|
||||||
|
log(f"[add_tags] Wrote {len(tags)} tag(s) to sidecar: {sidecar_path}", file=sys.stderr)
|
||||||
|
sidecar_count += 1
|
||||||
|
except Exception as e:
|
||||||
|
log(f"[add_tags] Warning: Failed to write sidecar for {file_path}: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
ctx.emit(res)
|
||||||
|
|
||||||
|
if sidecar_count > 0:
|
||||||
|
log(f"[add_tags] Wrote {sidecar_count} sidecar file(s) with embedded tags", file=sys.stderr)
|
||||||
|
else:
|
||||||
|
log(f"[add_tags] No tags to write - passed {len(results)} result(s) through unchanged", file=sys.stderr)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Tags ARE provided - append them to each result and write sidecar files or add to Hydrus
|
||||||
|
sidecar_count = 0
|
||||||
|
for res in results:
|
||||||
|
# Handle both dict and PipeObject formats
|
||||||
|
file_path = None
|
||||||
|
existing_tags = []
|
||||||
|
file_hash = ""
|
||||||
|
storage_source = None
|
||||||
|
hydrus_hash = None
|
||||||
|
|
||||||
|
if isinstance(res, models.PipeObject):
|
||||||
|
file_path = res.file_path
|
||||||
|
existing_tags = res.extra.get('tags', [])
|
||||||
|
file_hash = res.file_hash or ""
|
||||||
|
storage_source = res.extra.get('storage_source') or res.extra.get('source')
|
||||||
|
hydrus_hash = res.extra.get('hydrus_hash')
|
||||||
|
elif isinstance(res, dict):
|
||||||
|
file_path = res.get('file_path') or res.get('path')
|
||||||
|
existing_tags = res.get('tags', [])
|
||||||
|
if not existing_tags and 'extra' in res:
|
||||||
|
existing_tags = res['extra'].get('tags', [])
|
||||||
|
file_hash = res.get('file_hash', "")
|
||||||
|
storage_source = res.get('storage_source') or res.get('source') or res.get('origin')
|
||||||
|
if not storage_source and 'extra' in res:
|
||||||
|
storage_source = res['extra'].get('storage_source') or res['extra'].get('source')
|
||||||
|
# For Hydrus results from search-file, look for hash, hash_hex, or target (all contain the hash)
|
||||||
|
hydrus_hash = res.get('hydrus_hash') or res.get('hash') or res.get('hash_hex')
|
||||||
|
if not hydrus_hash and 'extra' in res:
|
||||||
|
hydrus_hash = res['extra'].get('hydrus_hash') or res['extra'].get('hash') or res['extra'].get('hash_hex')
|
||||||
|
else:
|
||||||
|
ctx.emit(res)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Apply hash override if provided
|
||||||
|
if hash_override:
|
||||||
|
hydrus_hash = hash_override
|
||||||
|
# If we have a hash override, we treat it as a Hydrus target
|
||||||
|
storage_source = "hydrus"
|
||||||
|
|
||||||
|
if not file_path and not hydrus_hash:
|
||||||
|
log(f"[add_tags] Warning: Result has neither file_path nor hash available, skipping", file=sys.stderr)
|
||||||
|
ctx.emit(res)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Handle -duplicate logic (copy existing tags to new namespaces)
|
||||||
|
if duplicate_arg:
|
||||||
|
# Parse duplicate format: source:target1,target2 or source,target1,target2
|
||||||
|
parts = duplicate_arg.split(':')
|
||||||
|
source_ns = ""
|
||||||
|
targets = []
|
||||||
|
|
||||||
|
if len(parts) > 1:
|
||||||
|
# Explicit format: source:target1,target2
|
||||||
|
source_ns = parts[0]
|
||||||
|
targets = parts[1].split(',')
|
||||||
|
else:
|
||||||
|
# Inferred format: source,target1,target2
|
||||||
|
parts = duplicate_arg.split(',')
|
||||||
|
if len(parts) > 1:
|
||||||
|
source_ns = parts[0]
|
||||||
|
targets = parts[1:]
|
||||||
|
|
||||||
|
if source_ns and targets:
|
||||||
|
# Find tags in source namespace
|
||||||
|
source_tags = [t for t in existing_tags if t.startswith(source_ns + ':')]
|
||||||
|
for t in source_tags:
|
||||||
|
value = t.split(':', 1)[1]
|
||||||
|
for target_ns in targets:
|
||||||
|
new_tag = f"{target_ns}:{value}"
|
||||||
|
if new_tag not in existing_tags and new_tag not in tags_to_add:
|
||||||
|
tags_to_add.append(new_tag)
|
||||||
|
|
||||||
|
# Merge new tags with existing tags, handling namespace overwrites
|
||||||
|
# When adding a tag like "namespace:value", remove any existing "namespace:*" tags
|
||||||
|
for new_tag in tags_to_add:
|
||||||
|
# Check if this is a namespaced tag (format: "namespace:value")
|
||||||
|
if ':' in new_tag:
|
||||||
|
namespace = new_tag.split(':', 1)[0]
|
||||||
|
# Remove any existing tags with the same namespace
|
||||||
|
existing_tags = [t for t in existing_tags if not (t.startswith(namespace + ':'))]
|
||||||
|
|
||||||
|
# Add the new tag if not already present
|
||||||
|
if new_tag not in existing_tags:
|
||||||
|
existing_tags.append(new_tag)
|
||||||
|
|
||||||
|
# Update the result's tags
|
||||||
|
if isinstance(res, models.PipeObject):
|
||||||
|
res.extra['tags'] = existing_tags
|
||||||
|
elif isinstance(res, dict):
|
||||||
|
res['tags'] = existing_tags
|
||||||
|
|
||||||
|
# Determine where to add tags: Hydrus, local DB, or sidecar
|
||||||
|
if storage_source and storage_source.lower() == 'hydrus':
|
||||||
|
# Add tags to Hydrus using the API
|
||||||
|
target_hash = hydrus_hash or file_hash
|
||||||
|
if target_hash:
|
||||||
|
try:
|
||||||
|
log(f"[add_tags] Adding {len(existing_tags)} tag(s) to Hydrus file: {target_hash}", file=sys.stderr)
|
||||||
|
hydrus_client = hydrus_wrapper.get_client(config)
|
||||||
|
hydrus_client.add_tags(target_hash, existing_tags, "my tags")
|
||||||
|
log(f"[add_tags] ✓ Tags added to Hydrus", file=sys.stderr)
|
||||||
|
sidecar_count += 1
|
||||||
|
except Exception as e:
|
||||||
|
log(f"[add_tags] Warning: Failed to add tags to Hydrus: {e}", file=sys.stderr)
|
||||||
|
else:
|
||||||
|
log(f"[add_tags] Warning: No hash available for Hydrus file, skipping", file=sys.stderr)
|
||||||
|
elif storage_source and storage_source.lower() == 'local':
|
||||||
|
# For local storage, save directly to DB (no sidecar needed)
|
||||||
|
if file_path:
|
||||||
|
library_root = get_local_storage_path(config)
|
||||||
|
if library_root:
|
||||||
|
try:
|
||||||
|
with LocalLibraryDB(library_root) as db:
|
||||||
|
db.save_tags(Path(file_path), existing_tags)
|
||||||
|
log(f"[add_tags] Saved {len(existing_tags)} tag(s) to local DB", file=sys.stderr)
|
||||||
|
sidecar_count += 1
|
||||||
|
except Exception as e:
|
||||||
|
log(f"[add_tags] Warning: Failed to save tags to local DB: {e}", file=sys.stderr)
|
||||||
|
else:
|
||||||
|
log(f"[add_tags] Warning: No library root configured for local storage, skipping", file=sys.stderr)
|
||||||
|
else:
|
||||||
|
log(f"[add_tags] Warning: No file path for local storage, skipping", file=sys.stderr)
|
||||||
|
else:
|
||||||
|
# For other storage types or unknown sources, write sidecar file if we have a file path
|
||||||
|
if file_path:
|
||||||
|
try:
|
||||||
|
sidecar_path = write_sidecar(Path(file_path), existing_tags, [], file_hash)
|
||||||
|
log(f"[add_tags] Wrote {len(existing_tags)} tag(s) to sidecar: {sidecar_path}", file=sys.stderr)
|
||||||
|
sidecar_count += 1
|
||||||
|
except Exception as e:
|
||||||
|
log(f"[add_tags] Warning: Failed to write sidecar for {file_path}: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Emit the modified result
|
||||||
|
ctx.emit(res)
|
||||||
|
|
||||||
|
log(f"[add_tags] Processed {len(results)} result(s)", file=sys.stderr)
|
||||||
|
return 0
|
||||||
78
cmdlets/add_url.py
Normal file
78
cmdlets/add_url.py
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, Sequence
|
||||||
|
import json
|
||||||
|
|
||||||
|
from . import register
|
||||||
|
import models
|
||||||
|
import pipeline as ctx
|
||||||
|
from helper import hydrus as hydrus_wrapper
|
||||||
|
from ._shared import Cmdlet, CmdletArg, normalize_hash
|
||||||
|
from helper.logger import log
|
||||||
|
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name="add-url",
|
||||||
|
summary="Associate a URL with a Hydrus file.",
|
||||||
|
usage="add-url [-hash <sha256>] <url>",
|
||||||
|
args=[
|
||||||
|
CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
|
||||||
|
CmdletArg("url", required=True, description="The URL to associate with the file."),
|
||||||
|
],
|
||||||
|
details=[
|
||||||
|
"- Adds the URL to the Hydrus file's known URL list.",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@register(["add-url", "ass-url", "associate-url", "add_url"]) # aliases
|
||||||
|
def add(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
# Help
|
||||||
|
try:
|
||||||
|
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||||
|
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||||
|
return 0
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
from ._shared import parse_cmdlet_args
|
||||||
|
parsed = parse_cmdlet_args(args, CMDLET)
|
||||||
|
override_hash = parsed.get("hash")
|
||||||
|
url = parsed.get("url")
|
||||||
|
|
||||||
|
if not url:
|
||||||
|
log("Requires a URL argument")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
url = str(url).strip()
|
||||||
|
if not url:
|
||||||
|
log("Requires a non-empty URL")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Handle @N selection which creates a list - extract the first item
|
||||||
|
if isinstance(result, list) and len(result) > 0:
|
||||||
|
result = result[0]
|
||||||
|
|
||||||
|
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
|
||||||
|
if not hash_hex:
|
||||||
|
log("Selected result does not include a Hydrus hash")
|
||||||
|
return 1
|
||||||
|
try:
|
||||||
|
client = hydrus_wrapper.get_client(config)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Hydrus client unavailable: {exc}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if client is None:
|
||||||
|
log("Hydrus client unavailable")
|
||||||
|
return 1
|
||||||
|
try:
|
||||||
|
client.associate_url(hash_hex, url)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Hydrus add-url failed: {exc}")
|
||||||
|
return 1
|
||||||
|
preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '')
|
||||||
|
ctx.emit(f"Associated URL with {preview}: {url}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
148
cmdlets/adjective.py
Normal file
148
cmdlets/adjective.py
Normal file
@@ -0,0 +1,148 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from typing import List, Dict, Any, Optional, Sequence
|
||||||
|
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args
|
||||||
|
from helper.logger import log
|
||||||
|
from result_table import ResultTable
|
||||||
|
import pipeline as ctx
|
||||||
|
|
||||||
|
ADJECTIVE_FILE = os.path.join(os.path.dirname(os.path.dirname(__file__)), "helper", "adjective.json")
|
||||||
|
|
||||||
|
def _load_adjectives() -> Dict[str, List[str]]:
|
||||||
|
try:
|
||||||
|
if os.path.exists(ADJECTIVE_FILE):
|
||||||
|
with open(ADJECTIVE_FILE, 'r', encoding='utf-8') as f:
|
||||||
|
return json.load(f)
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Error loading adjectives: {e}", file=sys.stderr)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def _save_adjectives(data: Dict[str, List[str]]) -> bool:
|
||||||
|
try:
|
||||||
|
with open(ADJECTIVE_FILE, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(data, f, indent=2)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Error saving adjectives: {e}", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
data = _load_adjectives()
|
||||||
|
|
||||||
|
# Parse arguments manually first to handle positional args
|
||||||
|
# We expect: .adjective [category] [tag] [-add] [-delete]
|
||||||
|
|
||||||
|
# If no args, list categories
|
||||||
|
if not args:
|
||||||
|
table = ResultTable("Adjective Categories")
|
||||||
|
for i, (category, tags) in enumerate(data.items()):
|
||||||
|
row = table.add_row()
|
||||||
|
row.add_column("#", str(i + 1))
|
||||||
|
row.add_column("Category", category)
|
||||||
|
row.add_column("Tag Amount", str(len(tags)))
|
||||||
|
|
||||||
|
# Selection expands to: .adjective "Category Name"
|
||||||
|
table.set_row_selection_args(i, [category])
|
||||||
|
|
||||||
|
table.set_source_command(".adjective")
|
||||||
|
ctx.set_last_result_table_overlay(table, list(data.keys()))
|
||||||
|
ctx.set_current_stage_table(table)
|
||||||
|
print(table)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# We have args. First arg is likely category.
|
||||||
|
category = args[0]
|
||||||
|
|
||||||
|
# Check if we are adding a new category (implicit if it doesn't exist)
|
||||||
|
if category not in data:
|
||||||
|
# If only category provided, create it
|
||||||
|
if len(args) == 1:
|
||||||
|
data[category] = []
|
||||||
|
_save_adjectives(data)
|
||||||
|
log(f"Created new category: {category}")
|
||||||
|
# If more args, we might be trying to add to a non-existent category
|
||||||
|
elif "-add" in args:
|
||||||
|
data[category] = []
|
||||||
|
# Continue to add logic
|
||||||
|
|
||||||
|
# Handle operations within category
|
||||||
|
remaining_args = list(args[1:])
|
||||||
|
|
||||||
|
# Check for -add flag
|
||||||
|
if "-add" in remaining_args:
|
||||||
|
# .adjective category -add tag
|
||||||
|
# or .adjective category tag -add
|
||||||
|
add_idx = remaining_args.index("-add")
|
||||||
|
# Tag could be before or after
|
||||||
|
tag = None
|
||||||
|
if add_idx + 1 < len(remaining_args):
|
||||||
|
tag = remaining_args[add_idx + 1]
|
||||||
|
elif add_idx > 0:
|
||||||
|
tag = remaining_args[add_idx - 1]
|
||||||
|
|
||||||
|
if tag:
|
||||||
|
if tag not in data[category]:
|
||||||
|
data[category].append(tag)
|
||||||
|
_save_adjectives(data)
|
||||||
|
log(f"Added '{tag}' to '{category}'")
|
||||||
|
else:
|
||||||
|
log(f"Tag '{tag}' already exists in '{category}'")
|
||||||
|
else:
|
||||||
|
log("Error: No tag specified to add")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Check for -delete flag
|
||||||
|
elif "-delete" in remaining_args:
|
||||||
|
# .adjective category -delete tag
|
||||||
|
# or .adjective category tag -delete
|
||||||
|
del_idx = remaining_args.index("-delete")
|
||||||
|
tag = None
|
||||||
|
if del_idx + 1 < len(remaining_args):
|
||||||
|
tag = remaining_args[del_idx + 1]
|
||||||
|
elif del_idx > 0:
|
||||||
|
tag = remaining_args[del_idx - 1]
|
||||||
|
|
||||||
|
if tag:
|
||||||
|
if tag in data[category]:
|
||||||
|
data[category].remove(tag)
|
||||||
|
_save_adjectives(data)
|
||||||
|
log(f"Deleted '{tag}' from '{category}'")
|
||||||
|
else:
|
||||||
|
log(f"Tag '{tag}' not found in '{category}'")
|
||||||
|
else:
|
||||||
|
log("Error: No tag specified to delete")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# List tags in category (Default action if no flags or after modification)
|
||||||
|
tags = data.get(category, [])
|
||||||
|
table = ResultTable(f"Tags in '{category}'")
|
||||||
|
for i, tag in enumerate(tags):
|
||||||
|
row = table.add_row()
|
||||||
|
row.add_column("#", str(i + 1))
|
||||||
|
row.add_column("Tag", tag)
|
||||||
|
|
||||||
|
# Selection expands to: .adjective "Category" "Tag"
|
||||||
|
# This allows typing @N -delete to delete it
|
||||||
|
table.set_row_selection_args(i, [category, tag])
|
||||||
|
|
||||||
|
table.set_source_command(".adjective")
|
||||||
|
ctx.set_last_result_table_overlay(table, tags)
|
||||||
|
ctx.set_current_stage_table(table)
|
||||||
|
print(table)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name=".adjective",
|
||||||
|
aliases=["adj"],
|
||||||
|
summary="Manage adjective categories and tags",
|
||||||
|
usage=".adjective [category] [-add tag] [-delete tag]",
|
||||||
|
args=[
|
||||||
|
CmdletArg(name="category", type="string", description="Category name", required=False),
|
||||||
|
CmdletArg(name="tag", type="string", description="Tag name", required=False),
|
||||||
|
CmdletArg(name="add", type="flag", description="Add tag"),
|
||||||
|
CmdletArg(name="delete", type="flag", description="Delete tag"),
|
||||||
|
],
|
||||||
|
exec=_run
|
||||||
|
)
|
||||||
153
cmdlets/check_file_status.py
Normal file
153
cmdlets/check_file_status.py
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, Sequence
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from helper.logger import log
|
||||||
|
|
||||||
|
from . import register
|
||||||
|
from helper import hydrus as hydrus_wrapper
|
||||||
|
from ._shared import Cmdlet, CmdletArg, normalize_hash
|
||||||
|
|
||||||
|
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name="check-file-status",
|
||||||
|
summary="Check if a file is active, deleted, or corrupted in Hydrus.",
|
||||||
|
usage="check-file-status [-hash <sha256>]",
|
||||||
|
args=[
|
||||||
|
CmdletArg("-hash", description="File hash (SHA256) to check. If not provided, uses selected result."),
|
||||||
|
],
|
||||||
|
details=[
|
||||||
|
"- Shows whether file is active in Hydrus or marked as deleted",
|
||||||
|
"- Detects corrupted data (e.g., comma-separated URLs)",
|
||||||
|
"- Displays file metadata and service locations",
|
||||||
|
"- Note: Hydrus keeps deleted files for recovery. Use cleanup-corrupted for full removal.",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@register(["check-file-status", "check-status", "file-status", "status"])
|
||||||
|
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
# Help
|
||||||
|
try:
|
||||||
|
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||||
|
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||||
|
return 0
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
override_hash: str | None = None
|
||||||
|
i = 0
|
||||||
|
while i < len(args):
|
||||||
|
token = args[i]
|
||||||
|
low = str(token).lower()
|
||||||
|
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
|
||||||
|
override_hash = str(args[i + 1]).strip()
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
|
||||||
|
|
||||||
|
if not hash_hex:
|
||||||
|
log("No hash provided and no result selected", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
client = hydrus_wrapper.get_client(config)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if client is None:
|
||||||
|
log("Hydrus client unavailable", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
result_data = client.fetch_file_metadata(hashes=[hash_hex])
|
||||||
|
if not result_data.get("metadata"):
|
||||||
|
log(f"File not found: {hash_hex[:16]}...", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
file_info = result_data["metadata"][0]
|
||||||
|
|
||||||
|
# Status summary
|
||||||
|
is_deleted = file_info.get("is_deleted", False)
|
||||||
|
is_local = file_info.get("is_local", False)
|
||||||
|
is_trashed = file_info.get("is_trashed", False)
|
||||||
|
|
||||||
|
status_str = "DELETED" if is_deleted else ("TRASHED" if is_trashed else "ACTIVE")
|
||||||
|
log(f"File status: {status_str}", file=sys.stderr)
|
||||||
|
|
||||||
|
# File info
|
||||||
|
log(f"\n📄 File Information:", file=sys.stderr)
|
||||||
|
log(f" Hash: {file_info['hash'][:16]}...", file=sys.stderr)
|
||||||
|
log(f" Size: {file_info['size']:,} bytes", file=sys.stderr)
|
||||||
|
log(f" MIME: {file_info['mime']}", file=sys.stderr)
|
||||||
|
log(f" Dimensions: {file_info.get('width', '?')}x{file_info.get('height', '?')}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Service status
|
||||||
|
file_services = file_info.get("file_services", {})
|
||||||
|
current_services = file_services.get("current", {})
|
||||||
|
deleted_services = file_services.get("deleted", {})
|
||||||
|
|
||||||
|
if current_services:
|
||||||
|
log(f"\n✓ In services ({len(current_services)}):", file=sys.stderr)
|
||||||
|
for service_key, service_info in current_services.items():
|
||||||
|
sname = service_info.get("name", "unknown")
|
||||||
|
stype = service_info.get("type_pretty", "unknown")
|
||||||
|
log(f" - {sname} ({stype})", file=sys.stderr)
|
||||||
|
|
||||||
|
if deleted_services:
|
||||||
|
log(f"\n✗ Deleted from services ({len(deleted_services)}):", file=sys.stderr)
|
||||||
|
for service_key, service_info in deleted_services.items():
|
||||||
|
sname = service_info.get("name", "unknown")
|
||||||
|
stype = service_info.get("type_pretty", "unknown")
|
||||||
|
time_deleted = service_info.get("time_deleted", "?")
|
||||||
|
log(f" - {sname} ({stype}) - deleted at {time_deleted}", file=sys.stderr)
|
||||||
|
|
||||||
|
# URL check
|
||||||
|
urls = file_info.get("known_urls", [])
|
||||||
|
log(f"\n🔗 URLs ({len(urls)}):", file=sys.stderr)
|
||||||
|
|
||||||
|
corrupted_count = 0
|
||||||
|
for i, url in enumerate(urls, 1):
|
||||||
|
if "," in url:
|
||||||
|
corrupted_count += 1
|
||||||
|
log(f" [{i}] ⚠️ CORRUPTED (comma-separated): {url[:50]}...", file=sys.stderr)
|
||||||
|
else:
|
||||||
|
log(f" [{i}] {url[:70]}{'...' if len(url) > 70 else ''}", file=sys.stderr)
|
||||||
|
|
||||||
|
if corrupted_count > 0:
|
||||||
|
log(f"\n⚠️ WARNING: Found {corrupted_count} corrupted URL(s)", file=sys.stderr)
|
||||||
|
|
||||||
|
# Tags
|
||||||
|
tags_dict = file_info.get("tags", {})
|
||||||
|
total_tags = 0
|
||||||
|
for service_key, service_data in tags_dict.items():
|
||||||
|
service_name = service_data.get("name", "unknown")
|
||||||
|
display_tags = service_data.get("display_tags", {}).get("0", [])
|
||||||
|
total_tags += len(display_tags)
|
||||||
|
|
||||||
|
if total_tags > 0:
|
||||||
|
log(f"\n🏷️ Tags ({total_tags}):", file=sys.stderr)
|
||||||
|
for service_key, service_data in tags_dict.items():
|
||||||
|
display_tags = service_data.get("display_tags", {}).get("0", [])
|
||||||
|
if display_tags:
|
||||||
|
service_name = service_data.get("name", "unknown")
|
||||||
|
log(f" {service_name}:", file=sys.stderr)
|
||||||
|
for tag in display_tags[:5]: # Show first 5
|
||||||
|
log(f" - {tag}", file=sys.stderr)
|
||||||
|
if len(display_tags) > 5:
|
||||||
|
log(f" ... and {len(display_tags) - 5} more", file=sys.stderr)
|
||||||
|
|
||||||
|
log("\n", file=sys.stderr)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Error checking file status: {exc}", file=sys.stderr)
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc(file=sys.stderr)
|
||||||
|
return 1
|
||||||
110
cmdlets/cleanup.py
Normal file
110
cmdlets/cleanup.py
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
"""Cleanup cmdlet for removing temporary artifacts from pipeline.
|
||||||
|
|
||||||
|
This cmdlet processes result lists and removes temporary files (marked with is_temp=True),
|
||||||
|
then emits the remaining non-temporary results for further pipeline stages.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, Sequence
|
||||||
|
from pathlib import Path
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from helper.logger import log
|
||||||
|
|
||||||
|
from . import register
|
||||||
|
from ._shared import Cmdlet, CmdletArg, get_pipe_object_path, normalize_result_input, filter_results_by_temp
|
||||||
|
import models
|
||||||
|
import pipeline as pipeline_context
|
||||||
|
|
||||||
|
|
||||||
|
@register(["cleanup"])
|
||||||
|
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
"""Remove temporary files from pipeline results.
|
||||||
|
|
||||||
|
Accepts:
|
||||||
|
- Single result object with is_temp field
|
||||||
|
- List of result objects to clean up
|
||||||
|
|
||||||
|
Process:
|
||||||
|
- Filters results by is_temp=True
|
||||||
|
- Deletes those files from disk
|
||||||
|
- Emits only non-temporary results
|
||||||
|
|
||||||
|
Typical pipeline usage:
|
||||||
|
download-data url | screen-shot | add-tag "tag" --all | cleanup
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Help
|
||||||
|
try:
|
||||||
|
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||||
|
import json
|
||||||
|
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||||
|
return 0
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Normalize input to list
|
||||||
|
results = normalize_result_input(result)
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
log("[cleanup] No results to process", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Separate temporary and permanent results
|
||||||
|
temp_results = pipeline_context.filter_results_by_temp(results, include_temp=True)
|
||||||
|
perm_results = pipeline_context.filter_results_by_temp(results, include_temp=False)
|
||||||
|
|
||||||
|
# Delete temporary files
|
||||||
|
deleted_count = 0
|
||||||
|
for temp_result in temp_results:
|
||||||
|
try:
|
||||||
|
file_path = get_pipe_object_path(temp_result)
|
||||||
|
|
||||||
|
if file_path:
|
||||||
|
path_obj = Path(file_path)
|
||||||
|
if path_obj.exists():
|
||||||
|
# Delete the file
|
||||||
|
path_obj.unlink()
|
||||||
|
log(f"[cleanup] Deleted temporary file: {path_obj.name}", file=sys.stderr)
|
||||||
|
deleted_count += 1
|
||||||
|
|
||||||
|
# Clean up any associated sidecar files
|
||||||
|
for ext in ['.tags', '.metadata']:
|
||||||
|
sidecar = path_obj.parent / (path_obj.name + ext)
|
||||||
|
if sidecar.exists():
|
||||||
|
try:
|
||||||
|
sidecar.unlink()
|
||||||
|
log(f"[cleanup] Deleted sidecar: {sidecar.name}", file=sys.stderr)
|
||||||
|
except Exception as e:
|
||||||
|
log(f"[cleanup] Warning: Could not delete sidecar {sidecar.name}: {e}", file=sys.stderr)
|
||||||
|
else:
|
||||||
|
log(f"[cleanup] File does not exist: {file_path}", file=sys.stderr)
|
||||||
|
except Exception as e:
|
||||||
|
log(f"[cleanup] Error deleting file: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Log summary
|
||||||
|
log(f"[cleanup] Deleted {deleted_count} temporary file(s), emitting {len(perm_results)} permanent result(s)", file=sys.stderr)
|
||||||
|
|
||||||
|
# Emit permanent results for downstream processing
|
||||||
|
for perm_result in perm_results:
|
||||||
|
pipeline_context.emit(perm_result)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name="cleanup",
|
||||||
|
summary="Remove temporary artifacts from pipeline (marked with is_temp=True).",
|
||||||
|
usage="cleanup",
|
||||||
|
args=[],
|
||||||
|
details=[
|
||||||
|
"- Accepts pipeline results that may contain temporary files (screenshots, intermediate artifacts)",
|
||||||
|
"- Deletes files marked with is_temp=True from disk",
|
||||||
|
"- Also cleans up associated sidecar files (.tags, .metadata)",
|
||||||
|
"- Emits only non-temporary results for further processing",
|
||||||
|
"- Typical usage at end of pipeline: ... | add-tag \"tag\" --all | cleanup",
|
||||||
|
"- Exit code 0 if cleanup successful, 1 if no results to process",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
242
cmdlets/delete_file.py
Normal file
242
cmdlets/delete_file.py
Normal file
@@ -0,0 +1,242 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, Sequence
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from helper.logger import log
|
||||||
|
import sqlite3
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import models
|
||||||
|
import pipeline as ctx
|
||||||
|
from helper import hydrus as hydrus_wrapper
|
||||||
|
from ._shared import Cmdlet, CmdletArg, normalize_hash
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _delete_database_entry(db_path: Path, file_path: str) -> bool:
|
||||||
|
"""Delete file and related entries from local library database.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_path: Path to the library.db file
|
||||||
|
file_path: Exact file path string as stored in database
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if successful, False otherwise
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if not db_path.exists():
|
||||||
|
log(f"Database not found at {db_path}", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
log(f"Searching database for file_path: {file_path}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Find the file_id using the exact file_path
|
||||||
|
cursor.execute('SELECT id FROM files WHERE file_path = ?', (file_path,))
|
||||||
|
result = cursor.fetchone()
|
||||||
|
|
||||||
|
if not result:
|
||||||
|
log(f"ERROR: File path not found in database", file=sys.stderr)
|
||||||
|
log(f"Expected: {file_path}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Debug: show sample entries
|
||||||
|
cursor.execute('SELECT id, file_path FROM files LIMIT 3')
|
||||||
|
samples = cursor.fetchall()
|
||||||
|
if samples:
|
||||||
|
log(f"Sample DB entries:", file=sys.stderr)
|
||||||
|
for fid, fpath in samples:
|
||||||
|
log(f"{fid}: {fpath}", file=sys.stderr)
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
return False
|
||||||
|
|
||||||
|
file_id = result[0]
|
||||||
|
log(f"Found file_id={file_id}, deleting all related records", file=sys.stderr)
|
||||||
|
|
||||||
|
# Delete related records
|
||||||
|
cursor.execute('DELETE FROM metadata WHERE file_id = ?', (file_id,))
|
||||||
|
meta_count = cursor.rowcount
|
||||||
|
|
||||||
|
cursor.execute('DELETE FROM tags WHERE file_id = ?', (file_id,))
|
||||||
|
tags_count = cursor.rowcount
|
||||||
|
|
||||||
|
cursor.execute('DELETE FROM notes WHERE file_id = ?', (file_id,))
|
||||||
|
notes_count = cursor.rowcount
|
||||||
|
|
||||||
|
cursor.execute('DELETE FROM files WHERE id = ?', (file_id,))
|
||||||
|
files_count = cursor.rowcount
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
log(f"Deleted: metadata={meta_count}, tags={tags_count}, notes={notes_count}, files={files_count}", file=sys.stderr)
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Database cleanup failed: {exc}", file=sys.stderr)
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc(file=sys.stderr)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
# Help
|
||||||
|
try:
|
||||||
|
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||||
|
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||||
|
return 0
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Handle @N selection which creates a list - extract the first item
|
||||||
|
if isinstance(result, list) and len(result) > 0:
|
||||||
|
result = result[0]
|
||||||
|
|
||||||
|
# Parse overrides and options
|
||||||
|
override_hash: str | None = None
|
||||||
|
conserve: str | None = None
|
||||||
|
lib_root: str | None = None
|
||||||
|
reason_tokens: list[str] = []
|
||||||
|
i = 0
|
||||||
|
while i < len(args):
|
||||||
|
token = args[i]
|
||||||
|
low = str(token).lower()
|
||||||
|
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
|
||||||
|
override_hash = str(args[i + 1]).strip()
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
if low in {"-conserve", "--conserve"} and i + 1 < len(args):
|
||||||
|
value = str(args[i + 1]).strip().lower()
|
||||||
|
if value in {"local", "hydrus"}:
|
||||||
|
conserve = value
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
if low in {"-lib-root", "--lib-root", "lib-root"} and i + 1 < len(args):
|
||||||
|
lib_root = str(args[i + 1]).strip()
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
reason_tokens.append(token)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
# Handle result as either dict or object
|
||||||
|
if isinstance(result, dict):
|
||||||
|
hash_hex_raw = result.get("hash_hex") or result.get("hash")
|
||||||
|
target = result.get("target")
|
||||||
|
origin = result.get("origin")
|
||||||
|
else:
|
||||||
|
hash_hex_raw = getattr(result, "hash_hex", None) or getattr(result, "hash", None)
|
||||||
|
target = getattr(result, "target", None)
|
||||||
|
origin = getattr(result, "origin", None)
|
||||||
|
|
||||||
|
# For Hydrus files, the target IS the hash
|
||||||
|
if origin and origin.lower() == "hydrus" and not hash_hex_raw:
|
||||||
|
hash_hex_raw = target
|
||||||
|
|
||||||
|
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(hash_hex_raw)
|
||||||
|
reason = " ".join(token for token in reason_tokens if str(token).strip()).strip()
|
||||||
|
|
||||||
|
local_deleted = False
|
||||||
|
local_target = isinstance(target, str) and target.strip() and not str(target).lower().startswith(("http://", "https://"))
|
||||||
|
if conserve != "local" and local_target:
|
||||||
|
path = Path(str(target))
|
||||||
|
file_path_str = str(target) # Keep the original string for DB matching
|
||||||
|
try:
|
||||||
|
if path.exists() and path.is_file():
|
||||||
|
path.unlink()
|
||||||
|
local_deleted = True
|
||||||
|
if ctx._PIPE_ACTIVE:
|
||||||
|
ctx.emit(f"Removed local file: {path}")
|
||||||
|
log(f"Deleted: {path.name}", file=sys.stderr)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Local delete failed: {exc}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Remove common sidecars regardless of file removal success
|
||||||
|
for sidecar in (path.with_suffix(".tags"), path.with_suffix(".tags.txt"),
|
||||||
|
path.with_suffix(".metadata"), path.with_suffix(".notes")):
|
||||||
|
try:
|
||||||
|
if sidecar.exists() and sidecar.is_file():
|
||||||
|
sidecar.unlink()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Clean up database entry if library root provided - do this regardless of file deletion success
|
||||||
|
if lib_root:
|
||||||
|
lib_root_path = Path(lib_root)
|
||||||
|
db_path = lib_root_path / ".downlow_library.db"
|
||||||
|
log(f"Attempting DB cleanup: lib_root={lib_root}, db_path={db_path}", file=sys.stderr)
|
||||||
|
log(f"Deleting DB entry for: {file_path_str}", file=sys.stderr)
|
||||||
|
if _delete_database_entry(db_path, file_path_str):
|
||||||
|
if ctx._PIPE_ACTIVE:
|
||||||
|
ctx.emit(f"Removed database entry: {path.name}")
|
||||||
|
log(f"Database entry cleaned up", file=sys.stderr)
|
||||||
|
local_deleted = True # Mark as deleted if DB cleanup succeeded
|
||||||
|
else:
|
||||||
|
log(f"Database entry not found or cleanup failed for {file_path_str}", file=sys.stderr)
|
||||||
|
else:
|
||||||
|
log(f"No lib_root provided, skipping database cleanup", file=sys.stderr)
|
||||||
|
|
||||||
|
hydrus_deleted = False
|
||||||
|
if conserve != "hydrus" and hash_hex:
|
||||||
|
try:
|
||||||
|
client = hydrus_wrapper.get_client(config)
|
||||||
|
except Exception as exc:
|
||||||
|
if not local_deleted:
|
||||||
|
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
else:
|
||||||
|
if client is None:
|
||||||
|
if not local_deleted:
|
||||||
|
log("Hydrus client unavailable", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
else:
|
||||||
|
payload: Dict[str, Any] = {"hashes": [hash_hex]}
|
||||||
|
if reason:
|
||||||
|
payload["reason"] = reason
|
||||||
|
try:
|
||||||
|
client._post("/add_files/delete_files", data=payload) # type: ignore[attr-defined]
|
||||||
|
hydrus_deleted = True
|
||||||
|
preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '')
|
||||||
|
log(f"Deleted from Hydrus: {preview}…", file=sys.stderr)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Hydrus delete failed: {exc}", file=sys.stderr)
|
||||||
|
if not local_deleted:
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if hydrus_deleted and hash_hex:
|
||||||
|
preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '')
|
||||||
|
if ctx._PIPE_ACTIVE:
|
||||||
|
if reason:
|
||||||
|
ctx.emit(f"Deleted {preview} (reason: {reason}).")
|
||||||
|
else:
|
||||||
|
ctx.emit(f"Deleted {preview}.")
|
||||||
|
|
||||||
|
if hydrus_deleted or local_deleted:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
log("Selected result has neither Hydrus hash nor local file target")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name="delete-file",
|
||||||
|
summary="Delete a file locally and/or from Hydrus, including database entries.",
|
||||||
|
usage="delete-file [-hash <sha256>] [-conserve <local|hydrus>] [-lib-root <path>] [reason]",
|
||||||
|
aliases=["del-file"],
|
||||||
|
args=[
|
||||||
|
CmdletArg("hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
|
||||||
|
CmdletArg("conserve", description="Choose which copy to keep: 'local' or 'hydrus'."),
|
||||||
|
CmdletArg("lib-root", description="Path to local library root for database cleanup."),
|
||||||
|
CmdletArg("reason", description="Optional reason for deletion (free text)."),
|
||||||
|
],
|
||||||
|
details=[
|
||||||
|
"Default removes both the local file and Hydrus file.",
|
||||||
|
"Use -conserve local to keep the local file, or -conserve hydrus to keep it in Hydrus.",
|
||||||
|
"Database entries are automatically cleaned up for local files.",
|
||||||
|
"Any remaining arguments are treated as the Hydrus reason text.",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
79
cmdlets/delete_note.py
Normal file
79
cmdlets/delete_note.py
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, Sequence
|
||||||
|
import json
|
||||||
|
|
||||||
|
from helper import hydrus as hydrus_wrapper
|
||||||
|
from ._shared import Cmdlet, CmdletArg, normalize_hash
|
||||||
|
from helper.logger import log
|
||||||
|
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name="delete-note",
|
||||||
|
summary="Delete a named note from a Hydrus file.",
|
||||||
|
usage="i | del-note [-hash <sha256>] <name>",
|
||||||
|
aliases=["del-note"],
|
||||||
|
args=[
|
||||||
|
|
||||||
|
],
|
||||||
|
details=[
|
||||||
|
"- Removes the note with the given name from the Hydrus file.",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
# Help
|
||||||
|
try:
|
||||||
|
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||||
|
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||||
|
return 0
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if not args:
|
||||||
|
log("Requires the note name/key to delete")
|
||||||
|
return 1
|
||||||
|
override_hash: str | None = None
|
||||||
|
rest: list[str] = []
|
||||||
|
i = 0
|
||||||
|
while i < len(args):
|
||||||
|
a = args[i]
|
||||||
|
low = str(a).lower()
|
||||||
|
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
|
||||||
|
override_hash = str(args[i + 1]).strip()
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
rest.append(a)
|
||||||
|
i += 1
|
||||||
|
if not rest:
|
||||||
|
log("Requires the note name/key to delete")
|
||||||
|
return 1
|
||||||
|
name = str(rest[0] or '').strip()
|
||||||
|
if not name:
|
||||||
|
log("Requires a non-empty note name/key")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Handle @N selection which creates a list - extract the first item
|
||||||
|
if isinstance(result, list) and len(result) > 0:
|
||||||
|
result = result[0]
|
||||||
|
|
||||||
|
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
|
||||||
|
if not hash_hex:
|
||||||
|
log("Selected result does not include a Hydrus hash")
|
||||||
|
return 1
|
||||||
|
try:
|
||||||
|
client = hydrus_wrapper.get_client(config)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Hydrus client unavailable: {exc}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if client is None:
|
||||||
|
log("Hydrus client unavailable")
|
||||||
|
return 1
|
||||||
|
try:
|
||||||
|
service_name = "my notes"
|
||||||
|
client.delete_notes(hash_hex, [name], service_name)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Hydrus delete-note failed: {exc}")
|
||||||
|
return 1
|
||||||
|
log(f"Deleted note '{name}'")
|
||||||
|
return 0
|
||||||
219
cmdlets/delete_tag.py
Normal file
219
cmdlets/delete_tag.py
Normal file
@@ -0,0 +1,219 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, Sequence
|
||||||
|
import json
|
||||||
|
|
||||||
|
from . import register
|
||||||
|
import models
|
||||||
|
import pipeline as ctx
|
||||||
|
from helper import hydrus as hydrus_wrapper
|
||||||
|
from ._shared import Cmdlet, CmdletArg, normalize_hash, parse_tag_arguments
|
||||||
|
from helper.logger import log
|
||||||
|
|
||||||
|
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name="delete-tags",
|
||||||
|
summary="Remove tags from a Hydrus file.",
|
||||||
|
usage="del-tags [-hash <sha256>] <tag>[,<tag>...]",
|
||||||
|
aliases=["del-tag", "del-tags", "delete-tag"],
|
||||||
|
args=[
|
||||||
|
CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
|
||||||
|
CmdletArg("<tag>[,<tag>...]", required=True, description="One or more tags to remove. Comma- or space-separated."),
|
||||||
|
],
|
||||||
|
details=[
|
||||||
|
"- Requires a Hydrus file (hash present) or explicit -hash override.",
|
||||||
|
"- Multiple tags can be comma-separated or space-separated.",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
@register(["del-tag", "del-tags", "delete-tag", "delete-tags"]) # Still needed for backward compatibility
|
||||||
|
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
# Help
|
||||||
|
try:
|
||||||
|
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||||
|
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||||
|
return 0
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Check if we have a piped TagItem with no args (i.e., from @1 | delete-tag)
|
||||||
|
has_piped_tag = (result and hasattr(result, '__class__') and
|
||||||
|
result.__class__.__name__ == 'TagItem' and
|
||||||
|
hasattr(result, 'tag_name'))
|
||||||
|
|
||||||
|
# Check if we have a piped list of TagItems (from @N selection)
|
||||||
|
has_piped_tag_list = (isinstance(result, list) and result and
|
||||||
|
hasattr(result[0], '__class__') and
|
||||||
|
result[0].__class__.__name__ == 'TagItem')
|
||||||
|
|
||||||
|
if not args and not has_piped_tag and not has_piped_tag_list:
|
||||||
|
log("Requires at least one tag argument")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Parse -hash override and collect tags from remaining args
|
||||||
|
override_hash: str | None = None
|
||||||
|
rest: list[str] = []
|
||||||
|
i = 0
|
||||||
|
while i < len(args):
|
||||||
|
a = args[i]
|
||||||
|
low = str(a).lower()
|
||||||
|
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
|
||||||
|
override_hash = str(args[i + 1]).strip()
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
rest.append(a)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
# Check if first argument is @ syntax (result table selection)
|
||||||
|
# @5 or @{2,5,8} to delete tags from ResultTable by index
|
||||||
|
tags_from_at_syntax = []
|
||||||
|
hash_from_at_syntax = None
|
||||||
|
|
||||||
|
if rest and str(rest[0]).startswith("@"):
|
||||||
|
selector_arg = str(rest[0])
|
||||||
|
pipe_selector = selector_arg[1:].strip()
|
||||||
|
# Parse @N or @{N,M,K} syntax
|
||||||
|
if pipe_selector.startswith("{") and pipe_selector.endswith("}"):
|
||||||
|
# @{2,5,8}
|
||||||
|
pipe_selector = pipe_selector[1:-1]
|
||||||
|
try:
|
||||||
|
indices = [int(tok.strip()) for tok in pipe_selector.split(',') if tok.strip()]
|
||||||
|
except ValueError:
|
||||||
|
log("Invalid selection syntax. Use @2 or @{2,5,8}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Get the last ResultTable from pipeline context
|
||||||
|
try:
|
||||||
|
last_table = ctx._LAST_RESULT_TABLE
|
||||||
|
if last_table:
|
||||||
|
# Extract tags from selected rows
|
||||||
|
for idx in indices:
|
||||||
|
if 1 <= idx <= len(last_table.rows):
|
||||||
|
# Look for a TagItem in _LAST_RESULT_ITEMS by index
|
||||||
|
if idx - 1 < len(ctx._LAST_RESULT_ITEMS):
|
||||||
|
item = ctx._LAST_RESULT_ITEMS[idx - 1]
|
||||||
|
if hasattr(item, '__class__') and item.__class__.__name__ == 'TagItem':
|
||||||
|
tag_name = getattr(item, 'tag_name', None)
|
||||||
|
if tag_name:
|
||||||
|
log(f"[delete_tag] Extracted tag from @{idx}: {tag_name}")
|
||||||
|
tags_from_at_syntax.append(tag_name)
|
||||||
|
# Also get hash from first item for consistency
|
||||||
|
if not hash_from_at_syntax:
|
||||||
|
hash_from_at_syntax = getattr(item, 'hash_hex', None)
|
||||||
|
|
||||||
|
if not tags_from_at_syntax:
|
||||||
|
log(f"No tags found at indices: {indices}")
|
||||||
|
return 1
|
||||||
|
else:
|
||||||
|
log("No ResultTable in pipeline (use @ after running get-tag)")
|
||||||
|
return 1
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Error processing @ selection: {exc}", file=__import__('sys').stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Handle @N selection which creates a list - extract the first item
|
||||||
|
if isinstance(result, list) and len(result) > 0:
|
||||||
|
# If we have a list of TagItems, we want to process ALL of them if no args provided
|
||||||
|
# This handles: delete-tag @1 (where @1 expands to a list containing one TagItem)
|
||||||
|
if not args and hasattr(result[0], '__class__') and result[0].__class__.__name__ == 'TagItem':
|
||||||
|
# We will extract tags from the list later
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
result = result[0]
|
||||||
|
|
||||||
|
# Determine tags and hash to use
|
||||||
|
tags: list[str] = []
|
||||||
|
hash_hex = None
|
||||||
|
|
||||||
|
if tags_from_at_syntax:
|
||||||
|
# Use tags extracted from @ syntax
|
||||||
|
tags = tags_from_at_syntax
|
||||||
|
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(hash_from_at_syntax)
|
||||||
|
log(f"[delete_tag] Using @ syntax extraction: {len(tags)} tag(s) to delete: {tags}")
|
||||||
|
elif isinstance(result, list) and result and hasattr(result[0], '__class__') and result[0].__class__.__name__ == 'TagItem':
|
||||||
|
# Got a list of TagItems (e.g. from delete-tag @1)
|
||||||
|
tags = [getattr(item, 'tag_name') for item in result if getattr(item, 'tag_name', None)]
|
||||||
|
# Use hash from first item
|
||||||
|
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result[0], "hash_hex", None))
|
||||||
|
elif result and hasattr(result, '__class__') and result.__class__.__name__ == 'TagItem':
|
||||||
|
# Got a piped TagItem - delete this specific tag
|
||||||
|
tag_name = getattr(result, 'tag_name', None)
|
||||||
|
if tag_name:
|
||||||
|
tags = [tag_name]
|
||||||
|
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
|
||||||
|
else:
|
||||||
|
# Traditional mode - parse tag arguments
|
||||||
|
tags = parse_tag_arguments(rest)
|
||||||
|
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
|
||||||
|
|
||||||
|
if not tags:
|
||||||
|
log("No valid tags were provided")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if not hash_hex:
|
||||||
|
log("Selected result does not include a hash")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
service_name = hydrus_wrapper.get_tag_service_name(config)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Failed to resolve tag service: {exc}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
client = hydrus_wrapper.get_client(config)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Hydrus client unavailable: {exc}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if client is None:
|
||||||
|
log("Hydrus client unavailable")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
log(f"[delete_tag] Sending deletion request: hash={hash_hex}, tags={tags}, service={service_name}")
|
||||||
|
try:
|
||||||
|
result = client.delete_tags(hash_hex, tags, service_name)
|
||||||
|
log(f"[delete_tag] Hydrus response: {result}")
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Hydrus del-tag failed: {exc}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '')
|
||||||
|
log(f"Removed {len(tags)} tag(s) from {preview} via '{service_name}'.")
|
||||||
|
|
||||||
|
# Re-fetch and emit updated tags after deletion
|
||||||
|
try:
|
||||||
|
payload = client.fetch_file_metadata(hashes=[str(hash_hex)], include_service_keys_to_tags=True, include_file_urls=False)
|
||||||
|
items = payload.get("metadata") if isinstance(payload, dict) else None
|
||||||
|
if isinstance(items, list) and items:
|
||||||
|
meta = items[0] if isinstance(items[0], dict) else None
|
||||||
|
if isinstance(meta, dict):
|
||||||
|
# Extract tags from updated metadata
|
||||||
|
from cmdlets.get_tag import _extract_my_tags_from_hydrus_meta, TagItem
|
||||||
|
service_key = hydrus_wrapper.get_tag_service_key(client, service_name)
|
||||||
|
updated_tags = _extract_my_tags_from_hydrus_meta(meta, service_key, service_name)
|
||||||
|
|
||||||
|
# Emit updated tags as TagItem objects
|
||||||
|
from result_table import ResultTable
|
||||||
|
table = ResultTable("Tags", max_columns=2)
|
||||||
|
tag_items = []
|
||||||
|
for idx, tag_name in enumerate(updated_tags, start=1):
|
||||||
|
tag_item = TagItem(
|
||||||
|
tag_name=tag_name,
|
||||||
|
tag_index=idx,
|
||||||
|
hash_hex=hash_hex,
|
||||||
|
source="hydrus",
|
||||||
|
service_name=service_name,
|
||||||
|
)
|
||||||
|
tag_items.append(tag_item)
|
||||||
|
table.add_result(tag_item)
|
||||||
|
ctx.emit(tag_item)
|
||||||
|
|
||||||
|
# Store items for @ selection in next command (CLI will handle table management)
|
||||||
|
# Don't call set_last_result_table so we don't pollute history or table context
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Warning: Could not fetch updated tags after deletion: {exc}", file=__import__('sys').stderr)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
82
cmdlets/delete_url.py
Normal file
82
cmdlets/delete_url.py
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, Sequence
|
||||||
|
import json
|
||||||
|
|
||||||
|
from . import register
|
||||||
|
from helper import hydrus as hydrus_wrapper
|
||||||
|
from ._shared import Cmdlet, CmdletArg, normalize_hash
|
||||||
|
from helper.logger import log
|
||||||
|
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name="delete-url",
|
||||||
|
summary="Remove a URL association from a Hydrus file.",
|
||||||
|
usage="delete-url [-hash <sha256>] <url>",
|
||||||
|
args=[
|
||||||
|
CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
|
||||||
|
CmdletArg("<url>", required=True, description="The URL to remove from the file."),
|
||||||
|
],
|
||||||
|
details=[
|
||||||
|
"- Removes the URL from the Hydrus file's known URL list.",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_hash_and_rest(args: Sequence[str]) -> tuple[str | None, list[str]]:
|
||||||
|
override_hash: str | None = None
|
||||||
|
rest: list[str] = []
|
||||||
|
i = 0
|
||||||
|
while i < len(args):
|
||||||
|
a = args[i]
|
||||||
|
low = str(a).lower()
|
||||||
|
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
|
||||||
|
override_hash = str(args[i + 1]).strip()
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
rest.append(a)
|
||||||
|
i += 1
|
||||||
|
return override_hash, rest
|
||||||
|
|
||||||
|
|
||||||
|
@register(["del-url", "delete-url", "delete_url"]) # aliases
|
||||||
|
def delete(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
# Help
|
||||||
|
try:
|
||||||
|
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||||
|
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||||
|
return 0
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
override_hash, rest = _parse_hash_and_rest(args)
|
||||||
|
if not rest:
|
||||||
|
log("Requires a URL argument")
|
||||||
|
return 1
|
||||||
|
url = str(rest[0] or '').strip()
|
||||||
|
if not url:
|
||||||
|
log("Requires a non-empty URL")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Handle @N selection which creates a list - extract the first item
|
||||||
|
if isinstance(result, list) and len(result) > 0:
|
||||||
|
result = result[0]
|
||||||
|
|
||||||
|
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
|
||||||
|
if not hash_hex:
|
||||||
|
log("Selected result does not include a Hydrus hash")
|
||||||
|
return 1
|
||||||
|
try:
|
||||||
|
client = hydrus_wrapper.get_client(config)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Hydrus client unavailable: {exc}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if client is None:
|
||||||
|
log("Hydrus client unavailable")
|
||||||
|
return 1
|
||||||
|
try:
|
||||||
|
client.delete_url(hash_hex, url)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Hydrus del-url failed: {exc}")
|
||||||
|
return 1
|
||||||
|
log(f"Deleted URL: {url}")
|
||||||
|
return 0
|
||||||
2633
cmdlets/download_data.py
Normal file
2633
cmdlets/download_data.py
Normal file
File diff suppressed because it is too large
Load Diff
1618
cmdlets/get_file.py
Normal file
1618
cmdlets/get_file.py
Normal file
File diff suppressed because it is too large
Load Diff
246
cmdlets/get_metadata.py
Normal file
246
cmdlets/get_metadata.py
Normal file
@@ -0,0 +1,246 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, Sequence, Optional
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from helper.logger import log
|
||||||
|
from pathlib import Path
|
||||||
|
import mimetypes
|
||||||
|
import os
|
||||||
|
|
||||||
|
from helper import hydrus as hydrus_wrapper
|
||||||
|
from ._shared import Cmdlet, CmdletArg, normalize_hash
|
||||||
|
|
||||||
|
|
||||||
|
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
# Help
|
||||||
|
try:
|
||||||
|
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args):
|
||||||
|
log(json.dumps(CMDLET.to_dict(), ensure_ascii=False, indent=2))
|
||||||
|
return 0
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Helper to get field from both dict and object
|
||||||
|
def get_field(obj: Any, field: str, default: Any = None) -> Any:
|
||||||
|
if isinstance(obj, dict):
|
||||||
|
return obj.get(field, default)
|
||||||
|
else:
|
||||||
|
return getattr(obj, field, default)
|
||||||
|
|
||||||
|
# Parse -hash override
|
||||||
|
override_hash: str | None = None
|
||||||
|
args_list = list(_args)
|
||||||
|
i = 0
|
||||||
|
while i < len(args_list):
|
||||||
|
a = args_list[i]
|
||||||
|
low = str(a).lower()
|
||||||
|
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args_list):
|
||||||
|
override_hash = str(args_list[i + 1]).strip()
|
||||||
|
break
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
# Try to determine if this is a local file or Hydrus file
|
||||||
|
local_path = get_field(result, "target", None) or get_field(result, "path", None)
|
||||||
|
is_local = False
|
||||||
|
if local_path and isinstance(local_path, str) and not local_path.startswith(("http://", "https://")):
|
||||||
|
is_local = True
|
||||||
|
|
||||||
|
# LOCAL FILE PATH
|
||||||
|
if is_local and local_path:
|
||||||
|
try:
|
||||||
|
file_path = Path(str(local_path))
|
||||||
|
if file_path.exists() and file_path.is_file():
|
||||||
|
# Get the hash from result or compute it
|
||||||
|
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None))
|
||||||
|
|
||||||
|
# If no hash, compute SHA256 of the file
|
||||||
|
if not hash_hex:
|
||||||
|
try:
|
||||||
|
import hashlib
|
||||||
|
with open(file_path, 'rb') as f:
|
||||||
|
hash_hex = hashlib.sha256(f.read()).hexdigest()
|
||||||
|
except Exception:
|
||||||
|
hash_hex = None
|
||||||
|
|
||||||
|
# Get MIME type
|
||||||
|
mime_type, _ = mimetypes.guess_type(str(file_path))
|
||||||
|
if not mime_type:
|
||||||
|
mime_type = "unknown"
|
||||||
|
|
||||||
|
# Get file size
|
||||||
|
try:
|
||||||
|
file_size = file_path.stat().st_size
|
||||||
|
except Exception:
|
||||||
|
file_size = None
|
||||||
|
|
||||||
|
# Try to get duration if it's a media file
|
||||||
|
duration_seconds = None
|
||||||
|
try:
|
||||||
|
# Try to use ffprobe if available
|
||||||
|
import subprocess
|
||||||
|
result_proc = subprocess.run(
|
||||||
|
["ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", str(file_path)],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=5
|
||||||
|
)
|
||||||
|
if result_proc.returncode == 0 and result_proc.stdout.strip():
|
||||||
|
try:
|
||||||
|
duration_seconds = float(result_proc.stdout.strip())
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Get format helpers from search module
|
||||||
|
try:
|
||||||
|
from .search_file import _format_size as _fmt_size
|
||||||
|
from .search_file import _format_duration as _fmt_dur
|
||||||
|
except Exception:
|
||||||
|
_fmt_size = lambda x: str(x) if x is not None else ""
|
||||||
|
_fmt_dur = lambda x: str(x) if x is not None else ""
|
||||||
|
|
||||||
|
size_label = _fmt_size(file_size) if file_size is not None else ""
|
||||||
|
dur_label = _fmt_dur(duration_seconds) if duration_seconds is not None else ""
|
||||||
|
|
||||||
|
# Get known URLs from sidecar or result
|
||||||
|
urls = []
|
||||||
|
sidecar_path = Path(str(file_path) + '.tags')
|
||||||
|
if sidecar_path.exists():
|
||||||
|
try:
|
||||||
|
with open(sidecar_path, 'r', encoding='utf-8') as f:
|
||||||
|
for line in f:
|
||||||
|
line = line.strip()
|
||||||
|
if line.startswith('known_url:'):
|
||||||
|
url_value = line.replace('known_url:', '', 1).strip()
|
||||||
|
if url_value:
|
||||||
|
urls.append(url_value)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Fallback to result URLs if not in sidecar
|
||||||
|
if not urls:
|
||||||
|
urls_from_result = get_field(result, "known_urls", None) or get_field(result, "urls", None)
|
||||||
|
if isinstance(urls_from_result, list):
|
||||||
|
urls.extend([str(u).strip() for u in urls_from_result if u])
|
||||||
|
|
||||||
|
# Display local file metadata
|
||||||
|
log(f"PATH: {file_path}")
|
||||||
|
if hash_hex:
|
||||||
|
log(f"HASH: {hash_hex}")
|
||||||
|
if mime_type:
|
||||||
|
log(f"MIME: {mime_type}")
|
||||||
|
if size_label:
|
||||||
|
log(f"Size: {size_label}")
|
||||||
|
if dur_label:
|
||||||
|
log(f"Duration: {dur_label}")
|
||||||
|
if urls:
|
||||||
|
log("URLs:")
|
||||||
|
for url in urls:
|
||||||
|
log(f" {url}")
|
||||||
|
|
||||||
|
return 0
|
||||||
|
except Exception as exc:
|
||||||
|
# Fall through to Hydrus if local file handling fails
|
||||||
|
pass
|
||||||
|
|
||||||
|
# HYDRUS PATH
|
||||||
|
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None))
|
||||||
|
if not hash_hex:
|
||||||
|
log("Selected result does not include a Hydrus hash or local path", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
client = hydrus_wrapper.get_client(config)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if client is None:
|
||||||
|
log("Hydrus client unavailable", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
payload = client.fetch_file_metadata(
|
||||||
|
hashes=[hash_hex],
|
||||||
|
include_service_keys_to_tags=False,
|
||||||
|
include_file_urls=True,
|
||||||
|
include_duration=True,
|
||||||
|
include_size=True,
|
||||||
|
include_mime=True,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Hydrus metadata fetch failed: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
items = payload.get("metadata") if isinstance(payload, dict) else None
|
||||||
|
if not isinstance(items, list) or not items:
|
||||||
|
log("No metadata found.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
meta = items[0] if isinstance(items[0], dict) else None
|
||||||
|
if not isinstance(meta, dict):
|
||||||
|
log("No metadata found.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
mime = meta.get("mime")
|
||||||
|
size = meta.get("size") or meta.get("file_size")
|
||||||
|
duration_value = meta.get("duration")
|
||||||
|
inner = meta.get("metadata") if isinstance(meta.get("metadata"), dict) else None
|
||||||
|
if duration_value is None and isinstance(inner, dict):
|
||||||
|
duration_value = inner.get("duration")
|
||||||
|
|
||||||
|
try:
|
||||||
|
from .search_file import _format_size as _fmt_size
|
||||||
|
from .search_file import _format_duration as _fmt_dur
|
||||||
|
from .search_file import _hydrus_duration_seconds as _dur_secs
|
||||||
|
except Exception:
|
||||||
|
_fmt_size = lambda x: str(x) if x is not None else ""
|
||||||
|
_dur_secs = lambda x: x
|
||||||
|
_fmt_dur = lambda x: str(x) if x is not None else ""
|
||||||
|
|
||||||
|
dur_seconds = _dur_secs(duration_value)
|
||||||
|
dur_label = _fmt_dur(dur_seconds) if dur_seconds is not None else ""
|
||||||
|
size_label = _fmt_size(size)
|
||||||
|
|
||||||
|
# Display Hydrus file metadata
|
||||||
|
log(f"PATH: hydrus://file/{hash_hex}")
|
||||||
|
log(f"Hash: {hash_hex}")
|
||||||
|
if mime:
|
||||||
|
log(f"MIME: {mime}")
|
||||||
|
if dur_label:
|
||||||
|
log(f"Duration: {dur_label}")
|
||||||
|
if size_label:
|
||||||
|
log(f"Size: {size_label}")
|
||||||
|
|
||||||
|
urls = meta.get("known_urls") or meta.get("urls")
|
||||||
|
if isinstance(urls, list) and urls:
|
||||||
|
log("URLs:")
|
||||||
|
for url in urls:
|
||||||
|
try:
|
||||||
|
text = str(url).strip()
|
||||||
|
except Exception:
|
||||||
|
text = ""
|
||||||
|
if text:
|
||||||
|
log(f" {text}")
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name="get-metadata",
|
||||||
|
summary="Print metadata for local or Hydrus files (hash, mime, duration, size, URLs).",
|
||||||
|
usage="get-metadata [-hash <sha256>]",
|
||||||
|
aliases=["meta"],
|
||||||
|
args=[
|
||||||
|
CmdletArg("hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
|
||||||
|
],
|
||||||
|
details=[
|
||||||
|
"- For local files: Shows path, hash (computed if needed), MIME type, size, duration, and known URLs from sidecar.",
|
||||||
|
"- For Hydrus files: Shows path (hydrus://), hash, MIME, duration, size, and known URLs.",
|
||||||
|
"- Automatically detects local vs Hydrus files.",
|
||||||
|
"- Local file hashes are computed via SHA256 if not already available.",
|
||||||
|
],
|
||||||
|
)
|
||||||
87
cmdlets/get_note.py
Normal file
87
cmdlets/get_note.py
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, Sequence
|
||||||
|
import json
|
||||||
|
|
||||||
|
from . import register
|
||||||
|
import models
|
||||||
|
import pipeline as ctx
|
||||||
|
from helper import hydrus as hydrus_wrapper
|
||||||
|
from ._shared import Cmdlet, CmdletArg, normalize_hash
|
||||||
|
from helper.logger import log
|
||||||
|
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name="get-note",
|
||||||
|
summary="List notes on a Hydrus file.",
|
||||||
|
usage="get-note [-hash <sha256>]",
|
||||||
|
args=[
|
||||||
|
CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
|
||||||
|
],
|
||||||
|
details=[
|
||||||
|
"- Prints notes by service and note name.",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@register(["get-note", "get-notes", "get_note"]) # aliases
|
||||||
|
def get_notes(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
# Helper to get field from both dict and object
|
||||||
|
def get_field(obj: Any, field: str, default: Any = None) -> Any:
|
||||||
|
if isinstance(obj, dict):
|
||||||
|
return obj.get(field, default)
|
||||||
|
else:
|
||||||
|
return getattr(obj, field, default)
|
||||||
|
|
||||||
|
# Help
|
||||||
|
try:
|
||||||
|
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||||
|
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||||
|
return 0
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
from ._shared import parse_cmdlet_args
|
||||||
|
parsed = parse_cmdlet_args(args, CMDLET)
|
||||||
|
override_hash = parsed.get("hash")
|
||||||
|
|
||||||
|
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None))
|
||||||
|
if not hash_hex:
|
||||||
|
log("Selected result does not include a Hydrus hash")
|
||||||
|
return 1
|
||||||
|
try:
|
||||||
|
client = hydrus_wrapper.get_client(config)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Hydrus client unavailable: {exc}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if client is None:
|
||||||
|
log("Hydrus client unavailable")
|
||||||
|
return 1
|
||||||
|
try:
|
||||||
|
payload = client.fetch_file_metadata(hashes=[hash_hex], include_service_keys_to_tags=False, include_notes=True)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Hydrus metadata fetch failed: {exc}")
|
||||||
|
return 1
|
||||||
|
items = payload.get("metadata") if isinstance(payload, dict) else None
|
||||||
|
meta = items[0] if (isinstance(items, list) and items and isinstance(items[0], dict)) else None
|
||||||
|
notes = {}
|
||||||
|
if isinstance(meta, dict):
|
||||||
|
# Hydrus returns service_keys_to_tags; for notes we expect 'service_names_to_notes' in modern API
|
||||||
|
notes = meta.get('notes') or meta.get('service_names_to_notes') or {}
|
||||||
|
if notes:
|
||||||
|
ctx.emit("Notes:")
|
||||||
|
# Print flattened: service -> (name: text)
|
||||||
|
if isinstance(notes, dict) and any(isinstance(v, dict) for v in notes.values()):
|
||||||
|
for svc, mapping in notes.items():
|
||||||
|
ctx.emit(f"- {svc}:")
|
||||||
|
if isinstance(mapping, dict):
|
||||||
|
for k, v in mapping.items():
|
||||||
|
ctx.emit(f" • {k}: {str(v).strip()}")
|
||||||
|
elif isinstance(notes, dict):
|
||||||
|
for k, v in notes.items():
|
||||||
|
ctx.emit(f"- {k}: {str(v).strip()}")
|
||||||
|
else:
|
||||||
|
ctx.emit("No notes found.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
240
cmdlets/get_relationship.py
Normal file
240
cmdlets/get_relationship.py
Normal file
@@ -0,0 +1,240 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, Sequence, List, Optional
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from helper.logger import log
|
||||||
|
|
||||||
|
from . import register
|
||||||
|
import models
|
||||||
|
import pipeline as ctx
|
||||||
|
from helper import hydrus as hydrus_wrapper
|
||||||
|
from ._shared import Cmdlet, CmdletArg, normalize_hash, fmt_bytes
|
||||||
|
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name="get-relationship",
|
||||||
|
summary="Print Hydrus relationships for the selected file.",
|
||||||
|
usage="get-relationship [-hash <sha256>]",
|
||||||
|
args=[
|
||||||
|
CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
|
||||||
|
],
|
||||||
|
details=[
|
||||||
|
"- Lists relationship data as returned by Hydrus.",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
@register(["get-rel", "get-relationship", "get-relationships", "get-file-relationships"]) # aliases
|
||||||
|
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
# Help
|
||||||
|
try:
|
||||||
|
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args):
|
||||||
|
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||||
|
return 0
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Parse -hash override
|
||||||
|
override_hash: str | None = None
|
||||||
|
args_list = list(_args)
|
||||||
|
i = 0
|
||||||
|
while i < len(args_list):
|
||||||
|
a = args_list[i]
|
||||||
|
low = str(a).lower()
|
||||||
|
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args_list):
|
||||||
|
override_hash = str(args_list[i + 1]).strip()
|
||||||
|
break
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
# Handle @N selection which creates a list - extract the first item
|
||||||
|
if isinstance(result, list) and len(result) > 0:
|
||||||
|
result = result[0]
|
||||||
|
|
||||||
|
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
|
||||||
|
if not hash_hex:
|
||||||
|
log("Selected result does not include a Hydrus hash", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
try:
|
||||||
|
client = hydrus_wrapper.get_client(config)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if client is None:
|
||||||
|
log("Hydrus client unavailable", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
try:
|
||||||
|
rel = client.get_file_relationships(hash_hex)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Hydrus relationships fetch failed: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
if not rel:
|
||||||
|
log("No relationships found.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Extract file_relationships from response
|
||||||
|
file_rels = rel.get("file_relationships", {})
|
||||||
|
if not file_rels:
|
||||||
|
log("No relationships found.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Get the relationships dict for this specific hash
|
||||||
|
this_file_rels = file_rels.get(hash_hex)
|
||||||
|
if not this_file_rels:
|
||||||
|
log("No relationships found.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Extract related hashes from all relationship types
|
||||||
|
# Keys "0", "1", "3", "8" are relationship type IDs
|
||||||
|
# Values are lists of hashes
|
||||||
|
related_hashes = []
|
||||||
|
for rel_type_id, hash_list in this_file_rels.items():
|
||||||
|
# Skip non-numeric keys and metadata keys
|
||||||
|
if rel_type_id in {"is_king", "king", "king_is_on_file_domain", "king_is_local"}:
|
||||||
|
continue
|
||||||
|
if isinstance(hash_list, list):
|
||||||
|
for rel_hash in hash_list:
|
||||||
|
if isinstance(rel_hash, str) and rel_hash and rel_hash != hash_hex:
|
||||||
|
related_hashes.append(rel_hash)
|
||||||
|
|
||||||
|
# Remove duplicates while preserving order
|
||||||
|
seen = set()
|
||||||
|
unique_hashes = []
|
||||||
|
for h in related_hashes:
|
||||||
|
if h not in seen:
|
||||||
|
seen.add(h)
|
||||||
|
unique_hashes.append(h)
|
||||||
|
|
||||||
|
if not unique_hashes:
|
||||||
|
log("No related files found.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Fetch metadata for all related files
|
||||||
|
try:
|
||||||
|
metadata_payload = client.fetch_file_metadata(
|
||||||
|
hashes=unique_hashes,
|
||||||
|
include_service_keys_to_tags=True,
|
||||||
|
include_duration=True,
|
||||||
|
include_size=True,
|
||||||
|
include_mime=True,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Hydrus metadata fetch failed: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
metadata_list = metadata_payload.get("metadata") if isinstance(metadata_payload, dict) else None
|
||||||
|
if not isinstance(metadata_list, list):
|
||||||
|
log("Hydrus metadata response was not a list", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Build metadata map by hash
|
||||||
|
meta_by_hash: Dict[str, Dict[str, Any]] = {}
|
||||||
|
for item in metadata_list:
|
||||||
|
if isinstance(item, dict):
|
||||||
|
item_hash = normalize_hash(item.get("hash"))
|
||||||
|
if item_hash:
|
||||||
|
meta_by_hash[item_hash] = item
|
||||||
|
|
||||||
|
# Helper functions for formatting
|
||||||
|
def _format_duration(seconds: Optional[float]) -> str:
|
||||||
|
if seconds is None:
|
||||||
|
return ""
|
||||||
|
try:
|
||||||
|
s = int(seconds)
|
||||||
|
hours = s // 3600
|
||||||
|
minutes = (s % 3600) // 60
|
||||||
|
secs = s % 60
|
||||||
|
if hours > 0:
|
||||||
|
return f"{hours}:{minutes:02d}:{secs:02d}"
|
||||||
|
else:
|
||||||
|
return f"{minutes}:{secs:02d}"
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def _get_title(meta: Dict[str, Any]) -> str:
|
||||||
|
# Try to extract title from tags
|
||||||
|
tags_payload = meta.get("tags")
|
||||||
|
if isinstance(tags_payload, dict):
|
||||||
|
for service_data in tags_payload.values():
|
||||||
|
if isinstance(service_data, dict):
|
||||||
|
storage_tags = service_data.get("storage_tags")
|
||||||
|
if isinstance(storage_tags, dict):
|
||||||
|
for tag_list in storage_tags.values():
|
||||||
|
if isinstance(tag_list, list):
|
||||||
|
for tag in tag_list:
|
||||||
|
tag_str = str(tag).lower()
|
||||||
|
if tag_str.startswith("title:"):
|
||||||
|
return str(tag)[6:].strip()
|
||||||
|
# Fallback to hash prefix
|
||||||
|
h = meta.get("hash")
|
||||||
|
return str(h)[:12] if h else "unknown"
|
||||||
|
|
||||||
|
def _get_mime_type(meta: Dict[str, Any]) -> str:
|
||||||
|
mime = meta.get("mime", "")
|
||||||
|
if not mime:
|
||||||
|
return ""
|
||||||
|
# Extract type from mime (e.g., "video/mp4" -> "video")
|
||||||
|
parts = str(mime).split("/")
|
||||||
|
return parts[0] if parts else ""
|
||||||
|
|
||||||
|
# Print header and separator
|
||||||
|
log("# | Title | Type | Duration | Size")
|
||||||
|
log("--+---------------------------+-------+----------+--------")
|
||||||
|
|
||||||
|
# Create result objects for each related file
|
||||||
|
results: List[Any] = []
|
||||||
|
|
||||||
|
# Print each related file
|
||||||
|
for idx, rel_hash in enumerate(unique_hashes, start=1):
|
||||||
|
meta = meta_by_hash.get(rel_hash)
|
||||||
|
if not meta:
|
||||||
|
continue
|
||||||
|
|
||||||
|
title = _get_title(meta)
|
||||||
|
mime_type = _get_mime_type(meta)
|
||||||
|
|
||||||
|
# Get duration
|
||||||
|
duration_value = meta.get("duration")
|
||||||
|
if duration_value is None and isinstance(meta.get("metadata"), dict):
|
||||||
|
duration_value = meta["metadata"].get("duration")
|
||||||
|
duration_str = _format_duration(duration_value)
|
||||||
|
|
||||||
|
# Get size
|
||||||
|
size = meta.get("size") or meta.get("file_size")
|
||||||
|
size_str = fmt_bytes(size) if size else ""
|
||||||
|
|
||||||
|
# Format and print row
|
||||||
|
title_display = title[:25].ljust(25)
|
||||||
|
type_display = mime_type[:5].ljust(5)
|
||||||
|
duration_display = duration_str[:8].ljust(8)
|
||||||
|
size_display = size_str[:7].ljust(7)
|
||||||
|
|
||||||
|
log(f"{idx:2d} | {title_display} | {type_display} | {duration_display} | {size_display}")
|
||||||
|
|
||||||
|
# Create result object for pipeline
|
||||||
|
result_obj = type("RelatedFile", (), {
|
||||||
|
"hash_hex": rel_hash,
|
||||||
|
"title": title,
|
||||||
|
"media_kind": mime_type or "other",
|
||||||
|
"size": size,
|
||||||
|
"duration": duration_value,
|
||||||
|
"known_urls": [],
|
||||||
|
"annotations": [],
|
||||||
|
"columns": [
|
||||||
|
("Title", title),
|
||||||
|
("Type", mime_type),
|
||||||
|
("Duration", duration_str),
|
||||||
|
("Size", size_str),
|
||||||
|
],
|
||||||
|
})()
|
||||||
|
results.append(result_obj)
|
||||||
|
|
||||||
|
# Emit results to pipeline
|
||||||
|
try:
|
||||||
|
ctx._PIPE_EMITS.extend(results)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
1191
cmdlets/get_tag.py
Normal file
1191
cmdlets/get_tag.py
Normal file
File diff suppressed because it is too large
Load Diff
90
cmdlets/get_url.py
Normal file
90
cmdlets/get_url.py
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, Sequence
|
||||||
|
import json
|
||||||
|
|
||||||
|
from . import register
|
||||||
|
import models
|
||||||
|
import pipeline as ctx
|
||||||
|
from helper import hydrus as hydrus_wrapper
|
||||||
|
from ._shared import Cmdlet, CmdletArg, normalize_hash
|
||||||
|
from helper.logger import log
|
||||||
|
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name="get-url",
|
||||||
|
summary="List URLs associated with a Hydrus file.",
|
||||||
|
usage="get-url [-hash <sha256>]",
|
||||||
|
args=[
|
||||||
|
CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
|
||||||
|
],
|
||||||
|
details=[
|
||||||
|
"- Prints the known URLs for the selected Hydrus file.",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_hash_and_rest(args: Sequence[str]) -> tuple[str | None, list[str]]:
|
||||||
|
override_hash: str | None = None
|
||||||
|
rest: list[str] = []
|
||||||
|
i = 0
|
||||||
|
while i < len(args):
|
||||||
|
a = args[i]
|
||||||
|
low = str(a).lower()
|
||||||
|
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
|
||||||
|
override_hash = str(args[i + 1]).strip()
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
rest.append(a)
|
||||||
|
i += 1
|
||||||
|
return override_hash, rest
|
||||||
|
|
||||||
|
|
||||||
|
@register(["get-url", "get-urls", "get_url"]) # aliases
|
||||||
|
def get_urls(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
# Helper to get field from both dict and object
|
||||||
|
def get_field(obj: Any, field: str, default: Any = None) -> Any:
|
||||||
|
if isinstance(obj, dict):
|
||||||
|
return obj.get(field, default)
|
||||||
|
else:
|
||||||
|
return getattr(obj, field, default)
|
||||||
|
|
||||||
|
# Help
|
||||||
|
try:
|
||||||
|
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||||
|
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||||
|
return 0
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
override_hash, _ = _parse_hash_and_rest(args)
|
||||||
|
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None))
|
||||||
|
if not hash_hex:
|
||||||
|
log("Selected result does not include a Hydrus hash")
|
||||||
|
return 1
|
||||||
|
try:
|
||||||
|
client = hydrus_wrapper.get_client(config)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Hydrus client unavailable: {exc}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if client is None:
|
||||||
|
log("Hydrus client unavailable")
|
||||||
|
return 1
|
||||||
|
try:
|
||||||
|
payload = client.fetch_file_metadata(hashes=[hash_hex], include_file_urls=True)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Hydrus metadata fetch failed: {exc}")
|
||||||
|
return 1
|
||||||
|
items = payload.get("metadata") if isinstance(payload, dict) else None
|
||||||
|
meta = items[0] if (isinstance(items, list) and items and isinstance(items[0], dict)) else None
|
||||||
|
urls = (meta.get("known_urls") if isinstance(meta, dict) else None) or []
|
||||||
|
if urls:
|
||||||
|
ctx.emit("URLs:")
|
||||||
|
for u in urls:
|
||||||
|
text = str(u).strip()
|
||||||
|
if text:
|
||||||
|
ctx.emit(f"- {text}")
|
||||||
|
else:
|
||||||
|
ctx.emit("No URLs found.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
138
cmdlets/manage_config.py
Normal file
138
cmdlets/manage_config.py
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
from typing import List, Dict, Any
|
||||||
|
from ._shared import Cmdlet, CmdletArg
|
||||||
|
from config import load_config, save_config
|
||||||
|
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name=".config",
|
||||||
|
summary="Manage configuration settings",
|
||||||
|
usage=".config [key] [value]",
|
||||||
|
args=[
|
||||||
|
CmdletArg(
|
||||||
|
name="key",
|
||||||
|
description="Configuration key to update (dot-separated)",
|
||||||
|
required=False
|
||||||
|
),
|
||||||
|
CmdletArg(
|
||||||
|
name="value",
|
||||||
|
description="New value for the configuration key",
|
||||||
|
required=False
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
def flatten_config(config: Dict[str, Any], parent_key: str = '', sep: str = '.') -> List[Dict[str, Any]]:
|
||||||
|
items = []
|
||||||
|
for k, v in config.items():
|
||||||
|
if k.startswith('_'): # Skip internal keys
|
||||||
|
continue
|
||||||
|
|
||||||
|
new_key = f"{parent_key}{sep}{k}" if parent_key else k
|
||||||
|
if isinstance(v, dict):
|
||||||
|
items.extend(flatten_config(v, new_key, sep=sep))
|
||||||
|
else:
|
||||||
|
items.append({
|
||||||
|
"Key": new_key,
|
||||||
|
"Value": str(v),
|
||||||
|
"Type": type(v).__name__,
|
||||||
|
"_selection_args": [new_key]
|
||||||
|
})
|
||||||
|
return items
|
||||||
|
|
||||||
|
def set_nested_config(config: Dict[str, Any], key: str, value: str) -> bool:
|
||||||
|
keys = key.split('.')
|
||||||
|
d = config
|
||||||
|
|
||||||
|
# Navigate to the parent dict
|
||||||
|
for k in keys[:-1]:
|
||||||
|
if k not in d or not isinstance(d[k], dict):
|
||||||
|
d[k] = {}
|
||||||
|
d = d[k]
|
||||||
|
|
||||||
|
last_key = keys[-1]
|
||||||
|
|
||||||
|
# Try to preserve type if key exists
|
||||||
|
if last_key in d:
|
||||||
|
current_val = d[last_key]
|
||||||
|
if isinstance(current_val, bool):
|
||||||
|
if value.lower() in ('true', 'yes', '1', 'on'):
|
||||||
|
d[last_key] = True
|
||||||
|
elif value.lower() in ('false', 'no', '0', 'off'):
|
||||||
|
d[last_key] = False
|
||||||
|
else:
|
||||||
|
# Fallback to boolean conversion of string (usually True for non-empty)
|
||||||
|
# But for config, explicit is better.
|
||||||
|
print(f"Warning: Could not convert '{value}' to boolean. Using string.")
|
||||||
|
d[last_key] = value
|
||||||
|
elif isinstance(current_val, int):
|
||||||
|
try:
|
||||||
|
d[last_key] = int(value)
|
||||||
|
except ValueError:
|
||||||
|
print(f"Warning: Could not convert '{value}' to int. Using string.")
|
||||||
|
d[last_key] = value
|
||||||
|
elif isinstance(current_val, float):
|
||||||
|
try:
|
||||||
|
d[last_key] = float(value)
|
||||||
|
except ValueError:
|
||||||
|
print(f"Warning: Could not convert '{value}' to float. Using string.")
|
||||||
|
d[last_key] = value
|
||||||
|
else:
|
||||||
|
d[last_key] = value
|
||||||
|
else:
|
||||||
|
# New key, try to infer type
|
||||||
|
if value.lower() in ('true', 'false'):
|
||||||
|
d[last_key] = (value.lower() == 'true')
|
||||||
|
elif value.isdigit():
|
||||||
|
d[last_key] = int(value)
|
||||||
|
else:
|
||||||
|
d[last_key] = value
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _run(piped_result: Any, args: List[str], config: Dict[str, Any]) -> int:
|
||||||
|
# Reload config to ensure we have the latest on disk
|
||||||
|
# We don't use the passed 'config' because we want to edit the file
|
||||||
|
# and 'config' might contain runtime objects (like worker manager)
|
||||||
|
# But load_config() returns a fresh dict from disk (or cache)
|
||||||
|
# We should use load_config()
|
||||||
|
|
||||||
|
current_config = load_config()
|
||||||
|
|
||||||
|
# Parse args
|
||||||
|
# We handle args manually because of the potential for spaces in values
|
||||||
|
# and the @ expansion logic in CLI.py passing args
|
||||||
|
|
||||||
|
if not args:
|
||||||
|
# List mode
|
||||||
|
items = flatten_config(current_config)
|
||||||
|
# Sort by key
|
||||||
|
items.sort(key=lambda x: x['Key'])
|
||||||
|
|
||||||
|
# Emit items for ResultTable
|
||||||
|
import pipeline as ctx
|
||||||
|
for item in items:
|
||||||
|
ctx.emit(item)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Update mode
|
||||||
|
key = args[0]
|
||||||
|
|
||||||
|
if len(args) < 2:
|
||||||
|
print(f"Error: Value required for key '{key}'")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
value = " ".join(args[1:])
|
||||||
|
|
||||||
|
# Remove quotes if present
|
||||||
|
if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")):
|
||||||
|
value = value[1:-1]
|
||||||
|
|
||||||
|
try:
|
||||||
|
set_nested_config(current_config, key, value)
|
||||||
|
save_config(current_config)
|
||||||
|
print(f"Updated '{key}' to '{value}'")
|
||||||
|
return 0
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error updating config: {e}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
CMDLET.exec = _run
|
||||||
916
cmdlets/merge_file.py
Normal file
916
cmdlets/merge_file.py
Normal file
@@ -0,0 +1,916 @@
|
|||||||
|
"""Merge multiple files into a single output file."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, Optional, Sequence, List
|
||||||
|
from pathlib import Path
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from helper.logger import log
|
||||||
|
from helper.download import download_media
|
||||||
|
from models import DownloadOptions
|
||||||
|
from config import resolve_output_dir
|
||||||
|
import subprocess as _subprocess
|
||||||
|
import shutil as _shutil
|
||||||
|
from ._shared import parse_cmdlet_args
|
||||||
|
|
||||||
|
try:
|
||||||
|
from PyPDF2 import PdfWriter, PdfReader
|
||||||
|
HAS_PYPDF2 = True
|
||||||
|
except ImportError:
|
||||||
|
HAS_PYPDF2 = False
|
||||||
|
PdfWriter = None
|
||||||
|
PdfReader = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
from metadata import (
|
||||||
|
read_tags_from_file,
|
||||||
|
write_tags_to_file,
|
||||||
|
dedup_tags_by_namespace,
|
||||||
|
merge_multiple_tag_lists,
|
||||||
|
write_tags,
|
||||||
|
write_metadata
|
||||||
|
)
|
||||||
|
HAS_METADATA_API = True
|
||||||
|
except ImportError:
|
||||||
|
HAS_METADATA_API = False
|
||||||
|
|
||||||
|
from . import register
|
||||||
|
from ._shared import (
|
||||||
|
Cmdlet,
|
||||||
|
CmdletArg,
|
||||||
|
normalize_result_input,
|
||||||
|
get_pipe_object_path,
|
||||||
|
get_pipe_object_hash,
|
||||||
|
)
|
||||||
|
import models
|
||||||
|
import pipeline as ctx
|
||||||
|
|
||||||
|
|
||||||
|
def _get_item_value(item: Any, key: str, default: Any = None) -> Any:
|
||||||
|
"""Helper to read either dict keys or attributes."""
|
||||||
|
if isinstance(item, dict):
|
||||||
|
return item.get(key, default)
|
||||||
|
return getattr(item, key, default)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
"""Merge multiple files into one."""
|
||||||
|
|
||||||
|
# Parse help
|
||||||
|
try:
|
||||||
|
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||||
|
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||||
|
return 0
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
parsed = parse_cmdlet_args(args, CMDLET)
|
||||||
|
delete_after = parsed.get("delete", False)
|
||||||
|
|
||||||
|
output_override: Optional[Path] = None
|
||||||
|
output_arg = parsed.get("output")
|
||||||
|
if output_arg:
|
||||||
|
try:
|
||||||
|
output_override = Path(str(output_arg)).expanduser()
|
||||||
|
except Exception:
|
||||||
|
output_override = None
|
||||||
|
|
||||||
|
format_spec = parsed.get("format")
|
||||||
|
if format_spec:
|
||||||
|
format_spec = str(format_spec).lower().strip()
|
||||||
|
|
||||||
|
# Collect files from piped results
|
||||||
|
# Use normalize_result_input to handle both single items and lists
|
||||||
|
files_to_merge: List[Dict[str, Any]] = normalize_result_input(result)
|
||||||
|
|
||||||
|
if not files_to_merge:
|
||||||
|
log("No files provided to merge", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if len(files_to_merge) < 2:
|
||||||
|
# Only 1 file - pass it through unchanged
|
||||||
|
# (merge only happens when multiple files are collected)
|
||||||
|
item = files_to_merge[0]
|
||||||
|
ctx.emit(item)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Extract file paths and metadata from result objects
|
||||||
|
source_files: List[Path] = []
|
||||||
|
source_tags_files: List[Path] = []
|
||||||
|
source_hashes: List[str] = []
|
||||||
|
source_urls: List[str] = []
|
||||||
|
source_tags: List[str] = [] # NEW: collect tags from source files
|
||||||
|
source_relationships: List[str] = [] # NEW: collect relationships from source files
|
||||||
|
|
||||||
|
for item in files_to_merge:
|
||||||
|
raw_path = get_pipe_object_path(item)
|
||||||
|
target_path = None
|
||||||
|
if isinstance(raw_path, Path):
|
||||||
|
target_path = raw_path
|
||||||
|
elif isinstance(raw_path, str) and raw_path.strip():
|
||||||
|
candidate = Path(raw_path).expanduser()
|
||||||
|
if candidate.exists():
|
||||||
|
target_path = candidate
|
||||||
|
|
||||||
|
# Check for playlist item that needs downloading
|
||||||
|
if not target_path and isinstance(item, dict) and item.get('__action', '').startswith('playlist-item:'):
|
||||||
|
try:
|
||||||
|
playlist_url = item.get('__file_path')
|
||||||
|
item_idx = int(item['__action'].split(':')[1])
|
||||||
|
log(f"Downloading playlist item #{item_idx} from {playlist_url}...", flush=True)
|
||||||
|
|
||||||
|
output_dir = resolve_output_dir(config)
|
||||||
|
opts = DownloadOptions(
|
||||||
|
url=playlist_url,
|
||||||
|
output_dir=output_dir,
|
||||||
|
playlist_items=str(item_idx),
|
||||||
|
mode="audio" if format_spec == "m4b" else "auto" # Infer mode if possible
|
||||||
|
)
|
||||||
|
|
||||||
|
res = download_media(opts)
|
||||||
|
if res and res.path and res.path.exists():
|
||||||
|
target_path = res.path
|
||||||
|
log(f"✓ Downloaded: {target_path.name}", flush=True)
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Failed to download playlist item: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
if target_path and target_path.exists():
|
||||||
|
source_files.append(target_path)
|
||||||
|
|
||||||
|
# Track the .tags file for this source
|
||||||
|
tags_file = target_path.with_suffix(target_path.suffix + '.tags')
|
||||||
|
if tags_file.exists():
|
||||||
|
source_tags_files.append(tags_file)
|
||||||
|
|
||||||
|
# Try to read hash, tags, urls, and relationships from .tags sidecar file
|
||||||
|
try:
|
||||||
|
tags_content = tags_file.read_text(encoding='utf-8')
|
||||||
|
for line in tags_content.split('\n'):
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
if line.startswith('hash:'):
|
||||||
|
hash_value = line[5:].strip()
|
||||||
|
if hash_value:
|
||||||
|
source_hashes.append(hash_value)
|
||||||
|
elif line.startswith('known_url:') or line.startswith('url:'):
|
||||||
|
# Extract URLs from tags file
|
||||||
|
url_value = line.split(':', 1)[1].strip() if ':' in line else ''
|
||||||
|
if url_value and url_value not in source_urls:
|
||||||
|
source_urls.append(url_value)
|
||||||
|
elif line.startswith('relationship:'):
|
||||||
|
# Extract relationships from tags file
|
||||||
|
rel_value = line.split(':', 1)[1].strip() if ':' in line else ''
|
||||||
|
if rel_value and rel_value not in source_relationships:
|
||||||
|
source_relationships.append(rel_value)
|
||||||
|
else:
|
||||||
|
# Collect actual tags (not metadata like hash: or known_url:)
|
||||||
|
source_tags.append(line)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Extract hash if available in item (as fallback)
|
||||||
|
hash_value = get_pipe_object_hash(item)
|
||||||
|
if hash_value and hash_value not in source_hashes:
|
||||||
|
source_hashes.append(str(hash_value))
|
||||||
|
|
||||||
|
# Extract known URLs if available
|
||||||
|
known_urls = _get_item_value(item, 'known_urls', [])
|
||||||
|
if isinstance(known_urls, str):
|
||||||
|
source_urls.append(known_urls)
|
||||||
|
elif isinstance(known_urls, list):
|
||||||
|
source_urls.extend(known_urls)
|
||||||
|
else:
|
||||||
|
title = _get_item_value(item, 'title', 'unknown') or _get_item_value(item, 'id', 'unknown')
|
||||||
|
log(f"Warning: Could not locate file for item: {title}", file=sys.stderr)
|
||||||
|
|
||||||
|
if len(source_files) < 2:
|
||||||
|
log("At least 2 valid files required to merge", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Detect file types
|
||||||
|
file_types = set()
|
||||||
|
for f in source_files:
|
||||||
|
suffix = f.suffix.lower()
|
||||||
|
if suffix in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.mka'}:
|
||||||
|
file_types.add('audio')
|
||||||
|
elif suffix in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
|
||||||
|
file_types.add('video')
|
||||||
|
elif suffix in {'.pdf'}:
|
||||||
|
file_types.add('pdf')
|
||||||
|
elif suffix in {'.txt', '.srt', '.vtt', '.md', '.log'}:
|
||||||
|
file_types.add('text')
|
||||||
|
else:
|
||||||
|
file_types.add('other')
|
||||||
|
|
||||||
|
if len(file_types) > 1 and 'other' not in file_types:
|
||||||
|
log(f"Mixed file types detected: {', '.join(sorted(file_types))}", file=sys.stderr)
|
||||||
|
log(f"Can only merge files of the same type", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
file_kind = list(file_types)[0] if file_types else 'other'
|
||||||
|
|
||||||
|
# Determine output format
|
||||||
|
output_format = format_spec or 'auto'
|
||||||
|
if output_format == 'auto':
|
||||||
|
if file_kind == 'audio':
|
||||||
|
output_format = 'mka' # Default audio codec - mka supports chapters and stream copy
|
||||||
|
elif file_kind == 'video':
|
||||||
|
output_format = 'mp4' # Default video codec
|
||||||
|
elif file_kind == 'pdf':
|
||||||
|
output_format = 'pdf'
|
||||||
|
else:
|
||||||
|
output_format = 'txt'
|
||||||
|
|
||||||
|
# Determine output path
|
||||||
|
if output_override:
|
||||||
|
if output_override.is_dir():
|
||||||
|
base_name = _sanitize_name(getattr(files_to_merge[0], 'title', 'merged'))
|
||||||
|
output_path = output_override / f"{base_name} (merged).{_ext_for_format(output_format)}"
|
||||||
|
else:
|
||||||
|
output_path = output_override
|
||||||
|
else:
|
||||||
|
first_file = source_files[0]
|
||||||
|
output_path = first_file.parent / f"{first_file.stem} (merged).{_ext_for_format(output_format)}"
|
||||||
|
|
||||||
|
# Ensure output directory exists
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Perform merge based on file type
|
||||||
|
if file_kind == 'audio':
|
||||||
|
success = _merge_audio(source_files, output_path, output_format)
|
||||||
|
elif file_kind == 'video':
|
||||||
|
success = _merge_video(source_files, output_path, output_format)
|
||||||
|
elif file_kind == 'pdf':
|
||||||
|
success = _merge_pdf(source_files, output_path)
|
||||||
|
elif file_kind == 'text':
|
||||||
|
success = _merge_text(source_files, output_path)
|
||||||
|
else:
|
||||||
|
log(f"Unsupported file type: {file_kind}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if not success:
|
||||||
|
log("Merge failed", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
log(f"Merged {len(source_files)} files into: {output_path}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Create .tags sidecar file for the merged output using unified API
|
||||||
|
tags_path = output_path.with_suffix(output_path.suffix + '.tags')
|
||||||
|
try:
|
||||||
|
# Start with title tag
|
||||||
|
merged_tags = [f"title:{output_path.stem}"]
|
||||||
|
|
||||||
|
# Merge tags from source files using metadata API
|
||||||
|
if source_tags and HAS_METADATA_API:
|
||||||
|
# Use dedup function to normalize and deduplicate
|
||||||
|
merged_source_tags = dedup_tags_by_namespace(source_tags)
|
||||||
|
merged_tags.extend(merged_source_tags)
|
||||||
|
log(f"Merged {len(merged_source_tags)} unique tags from source files", file=sys.stderr)
|
||||||
|
elif source_tags:
|
||||||
|
# Fallback: simple deduplication if metadata API unavailable
|
||||||
|
merged_tags.extend(list(dict.fromkeys(source_tags))) # Preserve order, remove duplicates
|
||||||
|
|
||||||
|
# Write merged tags to sidecar file
|
||||||
|
if HAS_METADATA_API and write_tags_to_file:
|
||||||
|
# Use unified API for file writing
|
||||||
|
source_hashes_list = source_hashes if source_hashes else None
|
||||||
|
source_urls_list = source_urls if source_urls else None
|
||||||
|
write_tags_to_file(tags_path, merged_tags, source_hashes_list, source_urls_list)
|
||||||
|
else:
|
||||||
|
# Fallback: manual file writing
|
||||||
|
tags_lines = []
|
||||||
|
|
||||||
|
# Add hash first (if available)
|
||||||
|
if source_hashes:
|
||||||
|
tags_lines.append(f"hash:{source_hashes[0]}")
|
||||||
|
|
||||||
|
# Add regular tags
|
||||||
|
tags_lines.extend(merged_tags)
|
||||||
|
|
||||||
|
# Add known URLs
|
||||||
|
if source_urls:
|
||||||
|
for url in source_urls:
|
||||||
|
tags_lines.append(f"known_url:{url}")
|
||||||
|
|
||||||
|
# Add relationships (if available)
|
||||||
|
if source_relationships:
|
||||||
|
for rel in source_relationships:
|
||||||
|
tags_lines.append(f"relationship:{rel}")
|
||||||
|
|
||||||
|
with open(tags_path, 'w', encoding='utf-8') as f:
|
||||||
|
f.write('\n'.join(tags_lines) + '\n')
|
||||||
|
|
||||||
|
log(f"Created sidecar: {tags_path.name}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Also create .metadata file using centralized function
|
||||||
|
try:
|
||||||
|
write_metadata(output_path, source_hashes[0] if source_hashes else None, source_urls, source_relationships)
|
||||||
|
log(f"Created metadata: {output_path.name}.metadata", file=sys.stderr)
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Warning: Could not create metadata file: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Warning: Could not create sidecar: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Emit PipelineItem so the merged file can be piped to next command
|
||||||
|
try:
|
||||||
|
# Try to import PipelineItem from downlow module
|
||||||
|
try:
|
||||||
|
from downlow import PipelineItem
|
||||||
|
except ImportError:
|
||||||
|
# Fallback: create a simple object with the required attributes
|
||||||
|
class SimpleItem:
|
||||||
|
def __init__(self, target, title, media_kind, tags=None, known_urls=None):
|
||||||
|
self.target = target
|
||||||
|
self.title = title
|
||||||
|
self.media_kind = media_kind
|
||||||
|
self.tags = tags or []
|
||||||
|
self.known_urls = known_urls or []
|
||||||
|
PipelineItem = SimpleItem
|
||||||
|
|
||||||
|
merged_item = PipelineItem(
|
||||||
|
target=str(output_path),
|
||||||
|
title=output_path.stem,
|
||||||
|
media_kind=file_kind,
|
||||||
|
tags=merged_tags, # Include merged tags
|
||||||
|
known_urls=source_urls # Include known URLs
|
||||||
|
)
|
||||||
|
ctx.emit(merged_item)
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Warning: Could not emit pipeline item: {e}", file=sys.stderr)
|
||||||
|
# Still emit a string representation for feedback
|
||||||
|
ctx.emit(f"Merged: {output_path}")
|
||||||
|
|
||||||
|
# Delete source files if requested
|
||||||
|
if delete_after:
|
||||||
|
# First delete all .tags files
|
||||||
|
for tags_file in source_tags_files:
|
||||||
|
try:
|
||||||
|
tags_file.unlink()
|
||||||
|
log(f"Deleted: {tags_file.name}", file=sys.stderr)
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Warning: Could not delete {tags_file.name}: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Then delete all source files
|
||||||
|
for f in source_files:
|
||||||
|
try:
|
||||||
|
f.unlink()
|
||||||
|
log(f"Deleted: {f.name}", file=sys.stderr)
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Warning: Could not delete {f.name}: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize_name(text: str) -> str:
|
||||||
|
"""Sanitize filename."""
|
||||||
|
allowed = []
|
||||||
|
for ch in text:
|
||||||
|
allowed.append(ch if (ch.isalnum() or ch in {"-", "_", " ", "."}) else " ")
|
||||||
|
return (" ".join("".join(allowed).split()) or "merged").strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _ext_for_format(fmt: str) -> str:
|
||||||
|
"""Get file extension for format."""
|
||||||
|
format_map = {
|
||||||
|
'mp3': 'mp3',
|
||||||
|
'm4a': 'm4a',
|
||||||
|
'aac': 'aac',
|
||||||
|
'opus': 'opus',
|
||||||
|
'mka': 'mka', # Matroska Audio - EXCELLENT chapter support (recommended)
|
||||||
|
'mkv': 'mkv',
|
||||||
|
'mp4': 'mp4',
|
||||||
|
'webm': 'webm',
|
||||||
|
'pdf': 'pdf',
|
||||||
|
'txt': 'txt',
|
||||||
|
'auto': 'mka', # Default - MKA for chapters
|
||||||
|
}
|
||||||
|
return format_map.get(fmt.lower(), 'mka')
|
||||||
|
|
||||||
|
|
||||||
|
def _add_chapters_to_m4a(file_path: Path, chapters: List[Dict]) -> bool:
|
||||||
|
"""Add chapters to an M4A file using mutagen.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: Path to M4A file
|
||||||
|
chapters: List of chapter dicts with 'title', 'start_ms', 'end_ms'
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if successful, False otherwise
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
if not chapters:
|
||||||
|
return True
|
||||||
|
|
||||||
|
try:
|
||||||
|
from mutagen.mp4 import MP4, Atom
|
||||||
|
from mutagen.mp4._util import Atom as MP4Atom
|
||||||
|
except ImportError:
|
||||||
|
logger.warning("[merge-file] mutagen not available for chapter writing")
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Load the MP4 file
|
||||||
|
audio = MP4(str(file_path))
|
||||||
|
|
||||||
|
# Build the chapter atom
|
||||||
|
# MP4 chapters are stored in a 'chap' atom with specific structure
|
||||||
|
chapter_data = b''
|
||||||
|
|
||||||
|
for i, chapter in enumerate(chapters, 1):
|
||||||
|
# Each chapter entry: 10-byte header + title
|
||||||
|
title = chapter.get('title', f'Chapter {i}').encode('utf-8')
|
||||||
|
start_time_ms = int(chapter.get('start_ms', 0))
|
||||||
|
|
||||||
|
# Chapter atom format for M4A:
|
||||||
|
# (uint32: size)(uint32: 'chap')(uint8: reserved)(uint24: atom type) + more...
|
||||||
|
# This is complex, so we'll use a simpler atom approach
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Unfortunately, mutagen doesn't have built-in chapter writing for MP4
|
||||||
|
# Chapter writing requires low-level atom manipulation
|
||||||
|
# For now, we'll just return and note this limitation
|
||||||
|
logger.info("[merge-file] MP4 chapter writing via mutagen not fully supported")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"[merge-file] Error writing chapters: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
|
||||||
|
"""Merge audio files with chapters based on file boundaries."""
|
||||||
|
import logging
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
ffmpeg_path = _shutil.which('ffmpeg')
|
||||||
|
if not ffmpeg_path:
|
||||||
|
log("ffmpeg not found in PATH", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Step 1: Get duration of each file to calculate chapter timestamps
|
||||||
|
chapters = []
|
||||||
|
current_time_ms = 0
|
||||||
|
|
||||||
|
log(f"Analyzing {len(files)} files for chapter information...", file=sys.stderr)
|
||||||
|
logger.info(f"[merge-file] Analyzing files for chapters")
|
||||||
|
|
||||||
|
for file_path in files:
|
||||||
|
# Get duration using ffprobe
|
||||||
|
try:
|
||||||
|
ffprobe_cmd = [
|
||||||
|
'ffprobe', '-v', 'error', '-show_entries',
|
||||||
|
'format=duration', '-print_format',
|
||||||
|
'default=noprint_wrappers=1:nokey=1', str(file_path)
|
||||||
|
]
|
||||||
|
|
||||||
|
probe_result = _subprocess.run(ffprobe_cmd, capture_output=True, text=True, timeout=10)
|
||||||
|
if probe_result.returncode == 0 and probe_result.stdout.strip():
|
||||||
|
try:
|
||||||
|
duration_sec = float(probe_result.stdout.strip())
|
||||||
|
except ValueError:
|
||||||
|
logger.warning(f"[merge-file] Could not parse duration from ffprobe output: {probe_result.stdout}")
|
||||||
|
duration_sec = 0
|
||||||
|
else:
|
||||||
|
logger.warning(f"[merge-file] ffprobe failed for {file_path.name}: {probe_result.stderr}")
|
||||||
|
duration_sec = 0
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"[merge-file] Could not get duration for {file_path.name}: {e}")
|
||||||
|
duration_sec = 0
|
||||||
|
|
||||||
|
# Create chapter entry - use title: tag from metadata if available
|
||||||
|
title = file_path.stem # Default to filename without extension
|
||||||
|
if HAS_METADATA_API:
|
||||||
|
try:
|
||||||
|
# Try to read tags from .tags sidecar file
|
||||||
|
tags_file = file_path.with_suffix(file_path.suffix + '.tags')
|
||||||
|
if tags_file.exists():
|
||||||
|
tags = read_tags_from_file(tags_file)
|
||||||
|
if tags:
|
||||||
|
# Look for title: tag
|
||||||
|
for tag in tags:
|
||||||
|
if isinstance(tag, str) and tag.lower().startswith('title:'):
|
||||||
|
# Extract the title value after the colon
|
||||||
|
title = tag.split(':', 1)[1].strip()
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"[merge-file] Could not read metadata for {file_path.name}: {e}")
|
||||||
|
pass # Fall back to filename
|
||||||
|
|
||||||
|
# Convert seconds to HH:MM:SS.mmm format
|
||||||
|
hours = int(current_time_ms // 3600000)
|
||||||
|
minutes = int((current_time_ms % 3600000) // 60000)
|
||||||
|
seconds = int((current_time_ms % 60000) // 1000)
|
||||||
|
millis = int(current_time_ms % 1000)
|
||||||
|
|
||||||
|
chapters.append({
|
||||||
|
'time_ms': current_time_ms,
|
||||||
|
'time_str': f"{hours:02d}:{minutes:02d}:{seconds:02d}.{millis:03d}",
|
||||||
|
'title': title,
|
||||||
|
'duration_sec': duration_sec
|
||||||
|
})
|
||||||
|
|
||||||
|
logger.info(f"[merge-file] Chapter: {title} @ {chapters[-1]['time_str']} (duration: {duration_sec:.2f}s)")
|
||||||
|
current_time_ms += int(duration_sec * 1000)
|
||||||
|
|
||||||
|
# Step 2: Create concat demuxer file
|
||||||
|
concat_file = output.parent / f".concat_{output.stem}.txt"
|
||||||
|
concat_lines = []
|
||||||
|
for f in files:
|
||||||
|
# Escape quotes in path
|
||||||
|
safe_path = str(f).replace("'", "'\\''")
|
||||||
|
concat_lines.append(f"file '{safe_path}'")
|
||||||
|
|
||||||
|
concat_file.write_text('\n'.join(concat_lines), encoding='utf-8')
|
||||||
|
|
||||||
|
# Step 3: Create FFmpeg metadata file with chapters
|
||||||
|
metadata_file = output.parent / f".metadata_{output.stem}.txt"
|
||||||
|
metadata_lines = [';FFMETADATA1']
|
||||||
|
|
||||||
|
for i, chapter in enumerate(chapters):
|
||||||
|
# FFMetadata format for chapters (note: [CHAPTER] not [CHAPTER01])
|
||||||
|
metadata_lines.append('[CHAPTER]')
|
||||||
|
metadata_lines.append('TIMEBASE=1/1000')
|
||||||
|
metadata_lines.append(f'START={chapter["time_ms"]}')
|
||||||
|
# Calculate end time (start of next chapter or end of file)
|
||||||
|
if i < len(chapters) - 1:
|
||||||
|
metadata_lines.append(f'END={chapters[i+1]["time_ms"]}')
|
||||||
|
else:
|
||||||
|
metadata_lines.append(f'END={current_time_ms}')
|
||||||
|
metadata_lines.append(f'title={chapter["title"]}')
|
||||||
|
|
||||||
|
metadata_file.write_text('\n'.join(metadata_lines), encoding='utf-8')
|
||||||
|
log(f"Created chapters metadata file with {len(chapters)} chapters", file=sys.stderr)
|
||||||
|
logger.info(f"[merge-file] Created {len(chapters)} chapters")
|
||||||
|
|
||||||
|
# Step 4: Build FFmpeg command to merge and embed chapters
|
||||||
|
# Strategy: First merge audio, then add metadata in separate pass
|
||||||
|
cmd = [ffmpeg_path, '-y', '-f', 'concat', '-safe', '0', '-i', str(concat_file)]
|
||||||
|
|
||||||
|
# Add threading options for speed
|
||||||
|
cmd.extend(['-threads', '0']) # Use all available threads
|
||||||
|
|
||||||
|
# Audio codec selection for first input
|
||||||
|
if output_format == 'mp3':
|
||||||
|
cmd.extend(['-c:a', 'libmp3lame', '-q:a', '2'])
|
||||||
|
elif output_format == 'm4a':
|
||||||
|
# Use copy if possible (much faster), otherwise re-encode
|
||||||
|
# Check if inputs are already AAC/M4A to avoid re-encoding
|
||||||
|
# For now, default to copy if format matches, otherwise re-encode
|
||||||
|
# But since we are merging potentially different codecs, re-encoding is safer
|
||||||
|
# To speed up re-encoding, we can use a faster preset or hardware accel if available
|
||||||
|
cmd.extend(['-c:a', 'aac', '-b:a', '256k']) # M4A with better quality
|
||||||
|
elif output_format == 'aac':
|
||||||
|
cmd.extend(['-c:a', 'aac', '-b:a', '192k'])
|
||||||
|
elif output_format == 'opus':
|
||||||
|
cmd.extend(['-c:a', 'libopus', '-b:a', '128k'])
|
||||||
|
elif output_format == 'mka':
|
||||||
|
# FLAC is fast to encode but large. Copy is fastest if inputs are compatible.
|
||||||
|
# If we want speed, copy is best. If we want compatibility, re-encode.
|
||||||
|
# Let's try copy first if inputs are same format, but that's hard to detect here.
|
||||||
|
# Defaulting to copy for MKA as it's a container that supports many codecs
|
||||||
|
cmd.extend(['-c:a', 'copy'])
|
||||||
|
else:
|
||||||
|
cmd.extend(['-c:a', 'copy']) # Copy without re-encoding
|
||||||
|
|
||||||
|
# Add the output file
|
||||||
|
cmd.append(str(output))
|
||||||
|
|
||||||
|
log(f"Merging {len(files)} audio files to {output_format}...", file=sys.stderr)
|
||||||
|
logger.info(f"[merge-file] Running ffmpeg merge: {' '.join(cmd)}")
|
||||||
|
|
||||||
|
# Run ffmpeg with progress monitoring
|
||||||
|
try:
|
||||||
|
from helper.progress import print_progress, print_final_progress
|
||||||
|
import re
|
||||||
|
|
||||||
|
process = _subprocess.Popen(
|
||||||
|
cmd,
|
||||||
|
stdout=_subprocess.PIPE,
|
||||||
|
stderr=_subprocess.PIPE,
|
||||||
|
text=True,
|
||||||
|
encoding='utf-8',
|
||||||
|
errors='replace'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Monitor progress
|
||||||
|
duration_re = re.compile(r"time=(\d{2}):(\d{2}):(\d{2})\.(\d{2})")
|
||||||
|
total_duration_sec = current_time_ms / 1000.0
|
||||||
|
|
||||||
|
while True:
|
||||||
|
# Read stderr line by line (ffmpeg writes progress to stderr)
|
||||||
|
if process.stderr:
|
||||||
|
line = process.stderr.readline()
|
||||||
|
if not line and process.poll() is not None:
|
||||||
|
break
|
||||||
|
|
||||||
|
if line:
|
||||||
|
# Parse time=HH:MM:SS.mm
|
||||||
|
match = duration_re.search(line)
|
||||||
|
if match and total_duration_sec > 0:
|
||||||
|
h, m, s, cs = map(int, match.groups())
|
||||||
|
current_sec = h * 3600 + m * 60 + s + cs / 100.0
|
||||||
|
|
||||||
|
# Calculate speed/bitrate if available (optional)
|
||||||
|
# For now just show percentage
|
||||||
|
print_progress(
|
||||||
|
output.name,
|
||||||
|
int(current_sec * 1000), # Use ms as "bytes" for progress bar
|
||||||
|
int(total_duration_sec * 1000),
|
||||||
|
speed=0
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Wait for completion
|
||||||
|
stdout, stderr = process.communicate()
|
||||||
|
|
||||||
|
if process.returncode != 0:
|
||||||
|
log(f"FFmpeg error: {stderr}", file=sys.stderr)
|
||||||
|
raise _subprocess.CalledProcessError(process.returncode, cmd, output=stdout, stderr=stderr)
|
||||||
|
|
||||||
|
print_final_progress(output.name, int(total_duration_sec * 1000), 0)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"[merge-file] ffmpeg process error: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
log(f"Merge successful, adding chapters metadata...", file=sys.stderr)
|
||||||
|
|
||||||
|
# Step 5: Embed chapters into container (MKA, MP4/M4A, or note limitation)
|
||||||
|
if output_format == 'mka' or output.suffix.lower() == '.mka':
|
||||||
|
# MKA/MKV format has native chapter support via FFMetadata
|
||||||
|
# Re-mux the file with chapters embedded (copy streams, no re-encode)
|
||||||
|
log(f"Embedding chapters into Matroska container...", file=sys.stderr)
|
||||||
|
logger.info(f"[merge-file] Adding chapters to MKA file via FFMetadata")
|
||||||
|
|
||||||
|
temp_output = output.parent / f".temp_{output.stem}.mka"
|
||||||
|
|
||||||
|
# Use mkvmerge if available (best for MKA chapters), otherwise fall back to ffmpeg
|
||||||
|
mkvmerge_path = _shutil.which('mkvmerge')
|
||||||
|
|
||||||
|
if mkvmerge_path:
|
||||||
|
# mkvmerge is the best tool for embedding chapters in Matroska files
|
||||||
|
log(f"Using mkvmerge for optimal chapter embedding...", file=sys.stderr)
|
||||||
|
cmd2 = [
|
||||||
|
mkvmerge_path, '-o', str(temp_output),
|
||||||
|
'--chapters', str(metadata_file),
|
||||||
|
str(output)
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
# Fallback to ffmpeg with proper chapter embedding for Matroska
|
||||||
|
log(f"Using ffmpeg for chapter embedding (install mkvtoolnix for better quality)...", file=sys.stderr)
|
||||||
|
# For Matroska files, the metadata must be provided via -f ffmetadata input
|
||||||
|
cmd2 = [
|
||||||
|
ffmpeg_path, '-y',
|
||||||
|
'-i', str(output), # Input: merged audio
|
||||||
|
'-i', str(metadata_file), # Input: FFMetadata file
|
||||||
|
'-c:a', 'copy', # Copy audio without re-encoding
|
||||||
|
'-threads', '0', # Use all threads
|
||||||
|
'-map', '0', # Map all from first input
|
||||||
|
'-map_chapters', '1', # Map CHAPTERS from second input (FFMetadata)
|
||||||
|
str(temp_output) # Output
|
||||||
|
]
|
||||||
|
|
||||||
|
logger.info(f"[merge-file] Running chapter embedding: {' '.join(cmd2)}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Run chapter embedding silently (progress handled by worker thread)
|
||||||
|
_subprocess.run(
|
||||||
|
cmd2,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
stdin=_subprocess.DEVNULL,
|
||||||
|
timeout=600,
|
||||||
|
check=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# Replace original with temp if successful
|
||||||
|
if temp_output.exists() and temp_output.stat().st_size > 0:
|
||||||
|
try:
|
||||||
|
import shutil
|
||||||
|
if output.exists():
|
||||||
|
output.unlink()
|
||||||
|
shutil.move(str(temp_output), str(output))
|
||||||
|
log(f"✓ Chapters successfully embedded!", file=sys.stderr)
|
||||||
|
logger.info(f"[merge-file] Chapters embedded successfully")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"[merge-file] Could not replace file: {e}")
|
||||||
|
log(f"Warning: Could not embed chapters, using merge without chapters", file=sys.stderr)
|
||||||
|
try:
|
||||||
|
temp_output.unlink()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
logger.warning(f"[merge-file] Chapter embedding did not create output")
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"[merge-file] Chapter embedding failed: {e}")
|
||||||
|
log(f"Warning: Chapter embedding failed, using merge without chapters", file=sys.stderr)
|
||||||
|
elif output_format == 'm4a' or output.suffix.lower() in ['.m4a', '.mp4']:
|
||||||
|
# MP4/M4A format has native chapter support via iTunes metadata atoms
|
||||||
|
log(f"Embedding chapters into MP4 container...", file=sys.stderr)
|
||||||
|
logger.info(f"[merge-file] Adding chapters to M4A/MP4 file via iTunes metadata")
|
||||||
|
|
||||||
|
temp_output = output.parent / f".temp_{output.stem}{output.suffix}"
|
||||||
|
|
||||||
|
# ffmpeg embeds chapters in MP4 using -map_metadata and -map_chapters
|
||||||
|
log(f"Using ffmpeg for MP4 chapter embedding...", file=sys.stderr)
|
||||||
|
cmd2 = [
|
||||||
|
ffmpeg_path, '-y',
|
||||||
|
'-i', str(output), # Input: merged audio
|
||||||
|
'-i', str(metadata_file), # Input: FFMetadata file
|
||||||
|
'-c:a', 'copy', # Copy audio without re-encoding
|
||||||
|
'-threads', '0', # Use all threads
|
||||||
|
'-map', '0', # Map all from first input
|
||||||
|
'-map_metadata', '1', # Map metadata from second input (FFMetadata)
|
||||||
|
'-map_chapters', '1', # Map CHAPTERS from second input (FFMetadata)
|
||||||
|
str(temp_output) # Output
|
||||||
|
]
|
||||||
|
|
||||||
|
logger.info(f"[merge-file] Running MP4 chapter embedding: {' '.join(cmd2)}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Run MP4 chapter embedding silently (progress handled by worker thread)
|
||||||
|
_subprocess.run(
|
||||||
|
cmd2,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
stdin=_subprocess.DEVNULL,
|
||||||
|
timeout=600,
|
||||||
|
check=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# Replace original with temp if successful
|
||||||
|
if temp_output.exists() and temp_output.stat().st_size > 0:
|
||||||
|
try:
|
||||||
|
import shutil
|
||||||
|
if output.exists():
|
||||||
|
output.unlink()
|
||||||
|
shutil.move(str(temp_output), str(output))
|
||||||
|
log(f"✓ Chapters successfully embedded in MP4!", file=sys.stderr)
|
||||||
|
logger.info(f"[merge-file] MP4 chapters embedded successfully")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"[merge-file] Could not replace file: {e}")
|
||||||
|
log(f"Warning: Could not embed chapters, using merge without chapters", file=sys.stderr)
|
||||||
|
try:
|
||||||
|
temp_output.unlink()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
logger.warning(f"[merge-file] MP4 chapter embedding did not create output")
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"[merge-file] MP4 chapter embedding failed: {e}")
|
||||||
|
log(f"Warning: MP4 chapter embedding failed, using merge without chapters", file=sys.stderr)
|
||||||
|
else:
|
||||||
|
# For other formats, chapters would require external tools
|
||||||
|
logger.info(f"[merge-file] Format {output_format} does not have native chapter support")
|
||||||
|
log(f"Note: For chapter support, use MKA or M4A format", file=sys.stderr)
|
||||||
|
|
||||||
|
# Clean up temp files
|
||||||
|
try:
|
||||||
|
concat_file.unlink()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
metadata_file.unlink()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Audio merge error: {e}", file=sys.stderr)
|
||||||
|
logger.error(f"[merge-file] Audio merge error: {e}", exc_info=True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _merge_video(files: List[Path], output: Path, output_format: str) -> bool:
|
||||||
|
"""Merge video files."""
|
||||||
|
ffmpeg_path = _shutil.which('ffmpeg')
|
||||||
|
if not ffmpeg_path:
|
||||||
|
log("ffmpeg not found in PATH", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Create concat demuxer file
|
||||||
|
concat_file = output.parent / f".concat_{output.stem}.txt"
|
||||||
|
concat_lines = []
|
||||||
|
for f in files:
|
||||||
|
safe_path = str(f).replace("'", "'\\''")
|
||||||
|
concat_lines.append(f"file '{safe_path}'")
|
||||||
|
|
||||||
|
concat_file.write_text('\n'.join(concat_lines), encoding='utf-8')
|
||||||
|
|
||||||
|
# Build FFmpeg command for video merge
|
||||||
|
cmd = [ffmpeg_path, '-y', '-f', 'concat', '-safe', '0', '-i', str(concat_file)]
|
||||||
|
|
||||||
|
# Video codec selection
|
||||||
|
if output_format == 'mp4':
|
||||||
|
cmd.extend(['-c:v', 'libx265', '-preset', 'fast', '-tag:v', 'hvc1', '-c:a', 'aac', '-b:a', '192k'])
|
||||||
|
elif output_format == 'mkv':
|
||||||
|
cmd.extend(['-c:v', 'libx265', '-preset', 'fast', '-c:a', 'aac', '-b:a', '192k'])
|
||||||
|
else:
|
||||||
|
cmd.extend(['-c', 'copy']) # Copy without re-encoding
|
||||||
|
|
||||||
|
cmd.append(str(output))
|
||||||
|
|
||||||
|
log(f"Merging {len(files)} video files...", file=sys.stderr)
|
||||||
|
result = _subprocess.run(cmd, capture_output=True, text=True)
|
||||||
|
|
||||||
|
# Clean up concat file
|
||||||
|
try:
|
||||||
|
concat_file.unlink()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if result.returncode != 0:
|
||||||
|
stderr = (result.stderr or '').strip()
|
||||||
|
log(f"FFmpeg error: {stderr}", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Video merge error: {e}", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _merge_text(files: List[Path], output: Path) -> bool:
|
||||||
|
"""Merge text files."""
|
||||||
|
try:
|
||||||
|
with open(output, 'w', encoding='utf-8') as outf:
|
||||||
|
for i, f in enumerate(files):
|
||||||
|
if i > 0:
|
||||||
|
outf.write('\n---\n') # Separator between files
|
||||||
|
try:
|
||||||
|
content = f.read_text(encoding='utf-8', errors='replace')
|
||||||
|
outf.write(content)
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Warning reading {f.name}: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Text merge error: {e}", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _merge_pdf(files: List[Path], output: Path) -> bool:
|
||||||
|
"""Merge PDF files."""
|
||||||
|
if not HAS_PYPDF2:
|
||||||
|
log("PyPDF2 is required for PDF merging. Install with: pip install PyPDF2", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
if HAS_PYPDF2:
|
||||||
|
writer = PdfWriter()
|
||||||
|
else:
|
||||||
|
log("PyPDF2 is required for PDF merging. Install with: pip install PyPDF2", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
|
||||||
|
for f in files:
|
||||||
|
try:
|
||||||
|
reader = PdfReader(f)
|
||||||
|
for page in reader.pages:
|
||||||
|
writer.add_page(page)
|
||||||
|
log(f"Added {len(reader.pages)} pages from {f.name}", file=sys.stderr)
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Error reading PDF {f.name}: {e}", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
|
||||||
|
with open(output, 'wb') as outf:
|
||||||
|
writer.write(outf)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log(f"PDF merge error: {e}", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name="merge-file",
|
||||||
|
summary="Merge multiple files into a single output file. Supports audio, video, PDF, and text merging with optional cleanup.",
|
||||||
|
usage="merge-file [-delete] [-output <path>] [-format <auto|mp3|aac|opus|mp4|mkv|pdf|txt>]",
|
||||||
|
args=[
|
||||||
|
CmdletArg("-delete", type="flag", description="Delete source files after successful merge."),
|
||||||
|
CmdletArg("-output", description="Override output file path."),
|
||||||
|
CmdletArg("-format", description="Output format (auto/mp3/aac/opus/mp4/mkv/pdf/txt). Default: auto-detect from first file."),
|
||||||
|
],
|
||||||
|
details=[
|
||||||
|
"- Pipe multiple files: search-file query | [1,2,3] | merge-file",
|
||||||
|
"- Audio files merge with minimal quality loss using specified codec.",
|
||||||
|
"- Video files merge into MP4 or MKV containers.",
|
||||||
|
"- PDF files merge into a single PDF document.",
|
||||||
|
"- Text/document files are concatenated.",
|
||||||
|
"- Output name derived from first file with ' (merged)' suffix.",
|
||||||
|
"- -delete flag removes all source files after successful merge.",
|
||||||
|
],
|
||||||
|
)
|
||||||
335
cmdlets/pipe.py
Normal file
335
cmdlets/pipe.py
Normal file
@@ -0,0 +1,335 @@
|
|||||||
|
from typing import Any, Dict, Sequence, List, Optional
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import platform
|
||||||
|
import socket
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args
|
||||||
|
from helper.logger import log
|
||||||
|
from result_table import ResultTable
|
||||||
|
from .get_file import _get_fixed_ipc_pipe
|
||||||
|
import pipeline as ctx
|
||||||
|
|
||||||
|
def _send_ipc_command(command: Dict[str, Any]) -> Optional[Any]:
|
||||||
|
"""Send a command to the MPV IPC pipe and return the response."""
|
||||||
|
ipc_pipe = _get_fixed_ipc_pipe()
|
||||||
|
request = json.dumps(command) + "\n"
|
||||||
|
|
||||||
|
try:
|
||||||
|
if platform.system() == 'Windows':
|
||||||
|
# Windows named pipe
|
||||||
|
# Opening in r+b mode to read response
|
||||||
|
try:
|
||||||
|
with open(ipc_pipe, 'r+b', buffering=0) as pipe:
|
||||||
|
pipe.write(request.encode('utf-8'))
|
||||||
|
pipe.flush()
|
||||||
|
|
||||||
|
# Read response
|
||||||
|
# We'll try to read a line. This might block if MPV is unresponsive.
|
||||||
|
response_line = pipe.readline()
|
||||||
|
if response_line:
|
||||||
|
return json.loads(response_line.decode('utf-8'))
|
||||||
|
except FileNotFoundError:
|
||||||
|
return None # MPV not running
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Windows IPC Error: {e}", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
# Unix socket
|
||||||
|
af_unix = getattr(socket, 'AF_UNIX', None)
|
||||||
|
if af_unix is None:
|
||||||
|
log("Unix sockets not supported on this platform", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
sock = socket.socket(af_unix, socket.SOCK_STREAM)
|
||||||
|
sock.settimeout(2.0)
|
||||||
|
sock.connect(ipc_pipe)
|
||||||
|
sock.sendall(request.encode('utf-8'))
|
||||||
|
|
||||||
|
# Read response
|
||||||
|
response_data = b""
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
chunk = sock.recv(4096)
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
response_data += chunk
|
||||||
|
if b"\n" in chunk:
|
||||||
|
break
|
||||||
|
except socket.timeout:
|
||||||
|
break
|
||||||
|
|
||||||
|
sock.close()
|
||||||
|
|
||||||
|
if response_data:
|
||||||
|
# Parse lines, look for response to our request
|
||||||
|
lines = response_data.decode('utf-8').strip().split('\n')
|
||||||
|
for line in lines:
|
||||||
|
try:
|
||||||
|
resp = json.loads(line)
|
||||||
|
# If it has 'error' field, it's a response
|
||||||
|
if 'error' in resp:
|
||||||
|
return resp
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
except (FileNotFoundError, ConnectionRefusedError):
|
||||||
|
return None # MPV not running
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Unix IPC Error: {e}", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log(f"IPC Error: {e}", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _get_playlist() -> List[Dict[str, Any]]:
|
||||||
|
"""Get the current playlist from MPV."""
|
||||||
|
cmd = {"command": ["get_property", "playlist"], "request_id": 100}
|
||||||
|
resp = _send_ipc_command(cmd)
|
||||||
|
if resp and resp.get("error") == "success":
|
||||||
|
return resp.get("data", [])
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
"""Manage and play items in the MPV playlist via IPC."""
|
||||||
|
|
||||||
|
parsed = parse_cmdlet_args(args, CMDLET)
|
||||||
|
|
||||||
|
# Handle positional index argument if provided
|
||||||
|
index_arg = parsed.get("index")
|
||||||
|
|
||||||
|
clear_mode = parsed.get("clear")
|
||||||
|
list_mode = parsed.get("list")
|
||||||
|
|
||||||
|
# Handle piped input (add to playlist)
|
||||||
|
if result:
|
||||||
|
# If result is a list of items, add them to playlist
|
||||||
|
items_to_add = []
|
||||||
|
if isinstance(result, list):
|
||||||
|
items_to_add = result
|
||||||
|
elif isinstance(result, dict):
|
||||||
|
items_to_add = [result]
|
||||||
|
|
||||||
|
added_count = 0
|
||||||
|
for i, item in enumerate(items_to_add):
|
||||||
|
# Extract URL/Path
|
||||||
|
target = None
|
||||||
|
title = None
|
||||||
|
|
||||||
|
if isinstance(item, dict):
|
||||||
|
target = item.get("target") or item.get("url") or item.get("path")
|
||||||
|
title = item.get("title") or item.get("name")
|
||||||
|
elif hasattr(item, "target"):
|
||||||
|
target = item.target
|
||||||
|
title = getattr(item, "title", None)
|
||||||
|
elif isinstance(item, str):
|
||||||
|
target = item
|
||||||
|
|
||||||
|
if target:
|
||||||
|
# Add to MPV playlist
|
||||||
|
# We use loadfile with append flag
|
||||||
|
# Configure 1080p limit for streams (bestvideo<=1080p + bestaudio)
|
||||||
|
options = {
|
||||||
|
"ytdl-format": "bestvideo[height<=?1080]+bestaudio/best[height<=?1080]"
|
||||||
|
}
|
||||||
|
|
||||||
|
if title:
|
||||||
|
options["force-media-title"] = title
|
||||||
|
|
||||||
|
cmd = {"command": ["loadfile", target, "append", options], "request_id": 200}
|
||||||
|
resp = _send_ipc_command(cmd)
|
||||||
|
|
||||||
|
if resp is None:
|
||||||
|
# MPV not running (or died)
|
||||||
|
# Start MPV with remaining items
|
||||||
|
_start_mpv(items_to_add[i:])
|
||||||
|
return 0
|
||||||
|
elif resp.get("error") == "success":
|
||||||
|
added_count += 1
|
||||||
|
if title:
|
||||||
|
log(f"Queued: {title}")
|
||||||
|
else:
|
||||||
|
log(f"Queued: {target}")
|
||||||
|
|
||||||
|
if added_count > 0:
|
||||||
|
# If we added items, we might want to play the first one if nothing is playing?
|
||||||
|
# For now, just list the playlist
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Get playlist from MPV
|
||||||
|
items = _get_playlist()
|
||||||
|
|
||||||
|
if not items:
|
||||||
|
log("MPV playlist is empty or MPV is not running.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# If index is provided, perform action (Play or Clear)
|
||||||
|
if index_arg is not None:
|
||||||
|
try:
|
||||||
|
# Handle 1-based index
|
||||||
|
idx = int(index_arg) - 1
|
||||||
|
|
||||||
|
if idx < 0 or idx >= len(items):
|
||||||
|
log(f"Index {index_arg} out of range (1-{len(items)}).")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
item = items[idx]
|
||||||
|
title = item.get("title") or item.get("filename") or "Unknown"
|
||||||
|
|
||||||
|
if clear_mode:
|
||||||
|
# Remove item
|
||||||
|
cmd = {"command": ["playlist-remove", idx], "request_id": 101}
|
||||||
|
resp = _send_ipc_command(cmd)
|
||||||
|
if resp and resp.get("error") == "success":
|
||||||
|
log(f"Removed: {title}")
|
||||||
|
# Refresh items for listing
|
||||||
|
items = _get_playlist()
|
||||||
|
list_mode = True
|
||||||
|
index_arg = None
|
||||||
|
else:
|
||||||
|
log(f"Failed to remove item: {resp.get('error') if resp else 'No response'}")
|
||||||
|
return 1
|
||||||
|
else:
|
||||||
|
# Play item
|
||||||
|
cmd = {"command": ["playlist-play-index", idx], "request_id": 102}
|
||||||
|
resp = _send_ipc_command(cmd)
|
||||||
|
if resp and resp.get("error") == "success":
|
||||||
|
log(f"Playing: {title}")
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
log(f"Failed to play item: {resp.get('error') if resp else 'No response'}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
except ValueError:
|
||||||
|
log(f"Invalid index: {index_arg}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# List items (Default action or after clear)
|
||||||
|
if list_mode or index_arg is None:
|
||||||
|
if not items:
|
||||||
|
log("MPV playlist is empty.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
table = ResultTable("MPV Playlist")
|
||||||
|
|
||||||
|
for i, item in enumerate(items):
|
||||||
|
is_current = item.get("current", False)
|
||||||
|
title = item.get("title") or ""
|
||||||
|
filename = item.get("filename") or ""
|
||||||
|
|
||||||
|
# Special handling for memory:// M3U playlists (used to pass titles via IPC)
|
||||||
|
if "memory://" in filename and "#EXTINF:" in filename:
|
||||||
|
try:
|
||||||
|
# Extract title from #EXTINF:-1,Title
|
||||||
|
# Use regex to find title between #EXTINF:-1, and newline
|
||||||
|
match = re.search(r"#EXTINF:-1,(.*?)(?:\n|\r|$)", filename)
|
||||||
|
if match:
|
||||||
|
extracted_title = match.group(1).strip()
|
||||||
|
if not title or title == "memory://":
|
||||||
|
title = extracted_title
|
||||||
|
|
||||||
|
# Extract actual URL
|
||||||
|
# Find the first line that looks like a URL and not a directive
|
||||||
|
lines = filename.splitlines()
|
||||||
|
for line in lines:
|
||||||
|
line = line.strip()
|
||||||
|
if line and not line.startswith('#') and not line.startswith('memory://'):
|
||||||
|
filename = line
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Truncate if too long
|
||||||
|
if len(title) > 57:
|
||||||
|
title = title[:57] + "..."
|
||||||
|
if len(filename) > 27:
|
||||||
|
filename = filename[:27] + "..."
|
||||||
|
|
||||||
|
row = table.add_row()
|
||||||
|
row.add_column("#", str(i + 1))
|
||||||
|
row.add_column("Current", "*" if is_current else "")
|
||||||
|
row.add_column("Title", title)
|
||||||
|
row.add_column("Filename", filename)
|
||||||
|
|
||||||
|
table.set_row_selection_args(i, [str(i + 1)])
|
||||||
|
|
||||||
|
table.set_source_command(".pipe")
|
||||||
|
|
||||||
|
# Register results with pipeline context so @N selection works
|
||||||
|
ctx.set_last_result_table_overlay(table, items)
|
||||||
|
ctx.set_current_stage_table(table)
|
||||||
|
|
||||||
|
print(table)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def _start_mpv(items: List[Any]) -> None:
|
||||||
|
"""Start MPV with a list of items."""
|
||||||
|
ipc_pipe = _get_fixed_ipc_pipe()
|
||||||
|
|
||||||
|
cmd = ['mpv', f'--input-ipc-server={ipc_pipe}']
|
||||||
|
cmd.append('--ytdl-format=bestvideo[height<=?1080]+bestaudio/best[height<=?1080]')
|
||||||
|
|
||||||
|
# Add items
|
||||||
|
first_title_set = False
|
||||||
|
|
||||||
|
for item in items:
|
||||||
|
target = None
|
||||||
|
title = None
|
||||||
|
|
||||||
|
if isinstance(item, dict):
|
||||||
|
target = item.get("target") or item.get("url") or item.get("path")
|
||||||
|
title = item.get("title") or item.get("name")
|
||||||
|
elif hasattr(item, "target"):
|
||||||
|
target = item.target
|
||||||
|
title = getattr(item, "title", None)
|
||||||
|
elif isinstance(item, str):
|
||||||
|
target = item
|
||||||
|
|
||||||
|
if target:
|
||||||
|
if not first_title_set and title:
|
||||||
|
cmd.append(f'--force-media-title={title}')
|
||||||
|
first_title_set = True
|
||||||
|
cmd.append(target)
|
||||||
|
|
||||||
|
if len(cmd) > 3: # mpv + ipc + format + at least one file
|
||||||
|
try:
|
||||||
|
kwargs = {}
|
||||||
|
if platform.system() == 'Windows':
|
||||||
|
kwargs['creationflags'] = 0x00000008 # DETACHED_PROCESS
|
||||||
|
|
||||||
|
subprocess.Popen(cmd, stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, **kwargs)
|
||||||
|
log(f"Started MPV with {len(cmd)-3} items")
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Error starting MPV: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name=".pipe",
|
||||||
|
aliases=["pipe", "playlist", "queue", "ls-pipe"],
|
||||||
|
summary="Manage and play items in the MPV playlist via IPC",
|
||||||
|
usage=".pipe [index] [-clear]",
|
||||||
|
args=[
|
||||||
|
CmdletArg(
|
||||||
|
name="index",
|
||||||
|
type="int",
|
||||||
|
description="Index of item to play or clear",
|
||||||
|
required=False
|
||||||
|
),
|
||||||
|
CmdletArg(
|
||||||
|
name="clear",
|
||||||
|
type="flag",
|
||||||
|
description="Remove the selected item from the playlist"
|
||||||
|
),
|
||||||
|
CmdletArg(
|
||||||
|
name="list",
|
||||||
|
type="flag",
|
||||||
|
description="List items (default)"
|
||||||
|
),
|
||||||
|
],
|
||||||
|
exec=_run
|
||||||
|
)
|
||||||
|
|
||||||
739
cmdlets/screen_shot.py
Normal file
739
cmdlets/screen_shot.py
Normal file
@@ -0,0 +1,739 @@
|
|||||||
|
"""Screen-shot cmdlet for capturing screenshots of URLs in a pipeline.
|
||||||
|
|
||||||
|
This cmdlet processes files through the pipeline and creates screenshots using
|
||||||
|
Playwright, marking them as temporary artifacts for cleanup.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import contextlib
|
||||||
|
import hashlib
|
||||||
|
import importlib
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import httpx
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
||||||
|
from urllib.parse import urlsplit, quote, urljoin
|
||||||
|
|
||||||
|
from helper.logger import log
|
||||||
|
from helper.http_client import HTTPClient
|
||||||
|
|
||||||
|
from . import register
|
||||||
|
from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, normalize_result_input
|
||||||
|
import models
|
||||||
|
import pipeline as pipeline_context
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# CMDLET Metadata Declaration
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Playwright & Screenshot Dependencies
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
try:
|
||||||
|
from playwright.sync_api import (
|
||||||
|
TimeoutError as PlaywrightTimeoutError,
|
||||||
|
ViewportSize,
|
||||||
|
sync_playwright,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
raise RuntimeError(
|
||||||
|
"playwright is required for screenshot capture; install with 'pip install playwright'"
|
||||||
|
) from exc
|
||||||
|
|
||||||
|
try:
|
||||||
|
from config import resolve_output_dir
|
||||||
|
except ImportError:
|
||||||
|
try:
|
||||||
|
_parent_dir = str(Path(__file__).parent.parent)
|
||||||
|
if _parent_dir not in sys.path:
|
||||||
|
sys.path.insert(0, _parent_dir)
|
||||||
|
from config import resolve_output_dir
|
||||||
|
except ImportError:
|
||||||
|
resolve_output_dir = None
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Screenshot Constants & Configuration
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
USER_AGENT = (
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||||
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||||
|
"Chrome/120.0.0.0 Safari/537.36"
|
||||||
|
)
|
||||||
|
|
||||||
|
DEFAULT_VIEWPORT: ViewportSize = {"width": 1280, "height": 1200}
|
||||||
|
ARCHIVE_TIMEOUT = 30.0
|
||||||
|
|
||||||
|
|
||||||
|
class ScreenshotError(RuntimeError):
|
||||||
|
"""Raised when screenshot capture or upload fails."""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class ScreenshotOptions:
|
||||||
|
"""Options controlling screenshot capture and post-processing."""
|
||||||
|
|
||||||
|
url: str
|
||||||
|
output_dir: Path
|
||||||
|
output_path: Optional[Path] = None
|
||||||
|
full_page: bool = True
|
||||||
|
headless: bool = True
|
||||||
|
wait_after_load: float = 2.0
|
||||||
|
wait_for_article: bool = False
|
||||||
|
replace_video_posters: bool = True
|
||||||
|
tags: Sequence[str] = ()
|
||||||
|
archive: bool = False
|
||||||
|
archive_timeout: float = ARCHIVE_TIMEOUT
|
||||||
|
known_urls: Sequence[str] = ()
|
||||||
|
output_format: Optional[str] = None
|
||||||
|
prefer_platform_target: bool = False
|
||||||
|
target_selectors: Optional[Sequence[str]] = None
|
||||||
|
selector_timeout_ms: int = 10_000
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class ScreenshotResult:
|
||||||
|
"""Details about the captured screenshot."""
|
||||||
|
|
||||||
|
path: Path
|
||||||
|
url: str
|
||||||
|
tags_applied: List[str]
|
||||||
|
archive_urls: List[str]
|
||||||
|
known_urls: List[str]
|
||||||
|
warnings: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Helper Functions
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
def _ensure_directory(path: Path) -> None:
|
||||||
|
"""Ensure directory exists."""
|
||||||
|
if not isinstance(path, Path):
|
||||||
|
path = Path(path)
|
||||||
|
path.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
def _unique_path(path: Path) -> Path:
|
||||||
|
"""Get unique path by appending numbers if file exists."""
|
||||||
|
if not path.exists():
|
||||||
|
return path
|
||||||
|
stem = path.stem
|
||||||
|
suffix = path.suffix
|
||||||
|
parent = path.parent
|
||||||
|
counter = 1
|
||||||
|
while True:
|
||||||
|
new_path = parent / f"{stem}_{counter}{suffix}"
|
||||||
|
if not new_path.exists():
|
||||||
|
return new_path
|
||||||
|
counter += 1
|
||||||
|
|
||||||
|
|
||||||
|
def _unique_preserve_order(items: Sequence[str]) -> List[str]:
|
||||||
|
"""Remove duplicates while preserving order."""
|
||||||
|
seen = set()
|
||||||
|
result = []
|
||||||
|
for item in items:
|
||||||
|
if item not in seen:
|
||||||
|
seen.add(item)
|
||||||
|
result.append(item)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _slugify_url(url: str) -> str:
|
||||||
|
"""Convert URL to filesystem-safe slug."""
|
||||||
|
parsed = urlsplit(url)
|
||||||
|
candidate = f"{parsed.netloc}{parsed.path}"
|
||||||
|
if parsed.query:
|
||||||
|
candidate += f"?{parsed.query}"
|
||||||
|
slug = "".join(char if char.isalnum() else "-" for char in candidate.lower())
|
||||||
|
slug = slug.strip("-") or "screenshot"
|
||||||
|
return slug[:100]
|
||||||
|
|
||||||
|
|
||||||
|
def _normalise_format(fmt: Optional[str]) -> str:
|
||||||
|
"""Normalize output format to valid values."""
|
||||||
|
if not fmt:
|
||||||
|
return "png"
|
||||||
|
value = fmt.strip().lower()
|
||||||
|
if value in {"jpg", "jpeg"}:
|
||||||
|
return "jpeg"
|
||||||
|
if value in {"png", "pdf"}:
|
||||||
|
return value
|
||||||
|
return "png"
|
||||||
|
|
||||||
|
|
||||||
|
def _format_suffix(fmt: str) -> str:
|
||||||
|
"""Get file suffix for format."""
|
||||||
|
if fmt == "jpeg":
|
||||||
|
return ".jpg"
|
||||||
|
return f".{fmt}"
|
||||||
|
|
||||||
|
|
||||||
|
def _selectors_for_url(url: str) -> List[str]:
|
||||||
|
"""Return a list of likely content selectors for known platforms."""
|
||||||
|
u = url.lower()
|
||||||
|
sels: List[str] = []
|
||||||
|
# Twitter/X
|
||||||
|
if "twitter.com" in u or "x.com" in u:
|
||||||
|
sels.extend([
|
||||||
|
"article[role='article']",
|
||||||
|
"div[data-testid='tweet']",
|
||||||
|
"div[data-testid='cellInnerDiv'] article",
|
||||||
|
])
|
||||||
|
# Instagram
|
||||||
|
if "instagram.com" in u:
|
||||||
|
sels.extend([
|
||||||
|
"article[role='presentation']",
|
||||||
|
"article[role='article']",
|
||||||
|
"div[role='dialog'] article",
|
||||||
|
"section main article",
|
||||||
|
])
|
||||||
|
# Reddit
|
||||||
|
if "reddit.com" in u:
|
||||||
|
sels.extend([
|
||||||
|
"shreddit-post",
|
||||||
|
"div[data-testid='post-container']",
|
||||||
|
"div[data-click-id='background']",
|
||||||
|
"article",
|
||||||
|
])
|
||||||
|
# Rumble (video post)
|
||||||
|
if "rumble.com" in u:
|
||||||
|
sels.extend([
|
||||||
|
"rumble-player, iframe.rumble",
|
||||||
|
"div.video-item--main",
|
||||||
|
"main article",
|
||||||
|
])
|
||||||
|
return sels or ["article"]
|
||||||
|
|
||||||
|
|
||||||
|
def _platform_preprocess(url: str, page: Any, warnings: List[str], timeout_ms: int = 10_000) -> None:
|
||||||
|
"""Best-effort page tweaks for popular platforms before capture."""
|
||||||
|
u = url.lower()
|
||||||
|
|
||||||
|
def _try_click_texts(texts: List[str], passes: int = 2, per_timeout: int = 700) -> int:
|
||||||
|
clicks = 0
|
||||||
|
for _ in range(max(1, passes)):
|
||||||
|
for t in texts:
|
||||||
|
try:
|
||||||
|
page.locator(f"text=/{t}/i").first.click(timeout=per_timeout)
|
||||||
|
clicks += 1
|
||||||
|
except PlaywrightTimeoutError:
|
||||||
|
pass
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
time.sleep(0.1)
|
||||||
|
return clicks
|
||||||
|
|
||||||
|
# Dismiss common cookie/consent prompts
|
||||||
|
_try_click_texts(["accept", "i agree", "agree", "got it", "allow all", "consent"])
|
||||||
|
|
||||||
|
# Platform-specific expansions
|
||||||
|
if "reddit.com" in u:
|
||||||
|
_try_click_texts(["see more", "read more", "show more", "more"])
|
||||||
|
if ("twitter.com" in u) or ("x.com" in u):
|
||||||
|
_try_click_texts(["show more", "more"])
|
||||||
|
if "instagram.com" in u:
|
||||||
|
_try_click_texts(["more", "see more"])
|
||||||
|
if "tiktok.com" in u:
|
||||||
|
_try_click_texts(["more", "see more"])
|
||||||
|
if ("facebook.com" in u) or ("fb.watch" in u):
|
||||||
|
_try_click_texts(["see more", "show more", "more"])
|
||||||
|
if "rumble.com" in u:
|
||||||
|
_try_click_texts(["accept", "agree", "close"])
|
||||||
|
|
||||||
|
|
||||||
|
def _submit_wayback(url: str, timeout: float) -> Optional[str]:
|
||||||
|
"""Submit URL to Internet Archive Wayback Machine."""
|
||||||
|
encoded = quote(url, safe="/:?=&")
|
||||||
|
with HTTPClient() as client:
|
||||||
|
response = client.get(f"https://web.archive.org/save/{encoded}")
|
||||||
|
response.raise_for_status()
|
||||||
|
content_location = response.headers.get("Content-Location")
|
||||||
|
if content_location:
|
||||||
|
return urljoin("https://web.archive.org", content_location)
|
||||||
|
return str(response.url)
|
||||||
|
|
||||||
|
|
||||||
|
def _submit_archive_today(url: str, timeout: float) -> Optional[str]:
|
||||||
|
"""Submit URL to Archive.today."""
|
||||||
|
encoded = quote(url, safe=":/?#[]@!$&'()*+,;=")
|
||||||
|
with HTTPClient(headers={"User-Agent": USER_AGENT}) as client:
|
||||||
|
response = client.get(f"https://archive.today/submit/?url={encoded}")
|
||||||
|
response.raise_for_status()
|
||||||
|
final = str(response.url)
|
||||||
|
if final and ("archive.today" in final or "archive.ph" in final):
|
||||||
|
return final
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _submit_archive_ph(url: str, timeout: float) -> Optional[str]:
|
||||||
|
"""Submit URL to Archive.ph."""
|
||||||
|
encoded = quote(url, safe=":/?#[]@!$&'()*+,;=")
|
||||||
|
with HTTPClient(headers={"User-Agent": USER_AGENT}) as client:
|
||||||
|
response = client.get(f"https://archive.ph/submit/?url={encoded}")
|
||||||
|
response.raise_for_status()
|
||||||
|
final = str(response.url)
|
||||||
|
if final and "archive.ph" in final:
|
||||||
|
return final
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _archive_url(url: str, timeout: float) -> Tuple[List[str], List[str]]:
|
||||||
|
"""Submit URL to all available archive services."""
|
||||||
|
archives: List[str] = []
|
||||||
|
warnings: List[str] = []
|
||||||
|
for submitter, label in (
|
||||||
|
(_submit_wayback, "wayback"),
|
||||||
|
(_submit_archive_today, "archive.today"),
|
||||||
|
(_submit_archive_ph, "archive.ph"),
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
log(f"Archiving to {label}...", flush=True)
|
||||||
|
archived = submitter(url, timeout)
|
||||||
|
except httpx.HTTPStatusError as exc:
|
||||||
|
if exc.response.status_code == 429:
|
||||||
|
warnings.append(f"archive {label} rate limited (HTTP 429)")
|
||||||
|
log(f"{label}: Rate limited (HTTP 429)", flush=True)
|
||||||
|
else:
|
||||||
|
warnings.append(f"archive {label} failed: HTTP {exc.response.status_code}")
|
||||||
|
log(f"{label}: HTTP {exc.response.status_code}", flush=True)
|
||||||
|
except httpx.RequestError as exc:
|
||||||
|
warnings.append(f"archive {label} failed: {exc}")
|
||||||
|
log(f"{label}: Connection error: {exc}", flush=True)
|
||||||
|
except Exception as exc:
|
||||||
|
warnings.append(f"archive {label} failed: {exc}")
|
||||||
|
log(f"{label}: {exc}", flush=True)
|
||||||
|
else:
|
||||||
|
if archived:
|
||||||
|
archives.append(archived)
|
||||||
|
log(f"{label}: Success - {archived}", flush=True)
|
||||||
|
else:
|
||||||
|
log(f"{label}: No archive link returned", flush=True)
|
||||||
|
return archives, warnings
|
||||||
|
|
||||||
|
|
||||||
|
def _prepare_output_path(options: ScreenshotOptions) -> Path:
|
||||||
|
"""Prepare and validate output path for screenshot."""
|
||||||
|
_ensure_directory(options.output_dir)
|
||||||
|
explicit_format = _normalise_format(options.output_format) if options.output_format else None
|
||||||
|
inferred_format: Optional[str] = None
|
||||||
|
if options.output_path is not None:
|
||||||
|
path = options.output_path
|
||||||
|
if not path.is_absolute():
|
||||||
|
path = options.output_dir / path
|
||||||
|
suffix = path.suffix.lower()
|
||||||
|
if suffix:
|
||||||
|
inferred_format = _normalise_format(suffix[1:])
|
||||||
|
else:
|
||||||
|
stamp = time.strftime("%Y%m%d_%H%M%S")
|
||||||
|
filename = f"{_slugify_url(options.url)}_{stamp}"
|
||||||
|
path = options.output_dir / filename
|
||||||
|
final_format = explicit_format or inferred_format or "png"
|
||||||
|
if not path.suffix:
|
||||||
|
path = path.with_suffix(_format_suffix(final_format))
|
||||||
|
else:
|
||||||
|
current_suffix = path.suffix.lower()
|
||||||
|
expected = _format_suffix(final_format)
|
||||||
|
if current_suffix != expected:
|
||||||
|
path = path.with_suffix(expected)
|
||||||
|
options.output_format = final_format
|
||||||
|
return _unique_path(path)
|
||||||
|
|
||||||
|
|
||||||
|
def _capture_with_playwright(options: ScreenshotOptions, destination: Path, warnings: List[str]) -> None:
|
||||||
|
"""Capture screenshot using Playwright."""
|
||||||
|
playwright = None
|
||||||
|
browser = None
|
||||||
|
context = None
|
||||||
|
try:
|
||||||
|
log("Starting Playwright...", flush=True)
|
||||||
|
playwright = sync_playwright().start()
|
||||||
|
log("Launching Chromium browser...", flush=True)
|
||||||
|
format_name = _normalise_format(options.output_format)
|
||||||
|
headless = options.headless or format_name == "pdf"
|
||||||
|
if format_name == "pdf" and not options.headless:
|
||||||
|
warnings.append("pdf output requires headless Chromium; overriding headless mode")
|
||||||
|
browser = playwright.chromium.launch(
|
||||||
|
headless=headless,
|
||||||
|
args=["--disable-blink-features=AutomationControlled"],
|
||||||
|
)
|
||||||
|
log("Creating browser context...", flush=True)
|
||||||
|
context = browser.new_context(
|
||||||
|
user_agent=USER_AGENT,
|
||||||
|
viewport=DEFAULT_VIEWPORT,
|
||||||
|
ignore_https_errors=True,
|
||||||
|
)
|
||||||
|
page = context.new_page()
|
||||||
|
log(f"Navigating to {options.url}...", flush=True)
|
||||||
|
try:
|
||||||
|
page.goto(options.url, timeout=90_000, wait_until="domcontentloaded")
|
||||||
|
log("Page loaded successfully", flush=True)
|
||||||
|
except PlaywrightTimeoutError:
|
||||||
|
warnings.append("navigation timeout; capturing current page state")
|
||||||
|
log("Navigation timeout; proceeding with current state", flush=True)
|
||||||
|
|
||||||
|
# Skip article lookup by default (wait_for_article defaults to False)
|
||||||
|
if options.wait_for_article:
|
||||||
|
try:
|
||||||
|
log("Waiting for article element...", flush=True)
|
||||||
|
page.wait_for_selector("article", timeout=10_000)
|
||||||
|
log("Article element found", flush=True)
|
||||||
|
except PlaywrightTimeoutError:
|
||||||
|
warnings.append("<article> selector not found; capturing fallback")
|
||||||
|
log("Article element not found; using fallback", flush=True)
|
||||||
|
|
||||||
|
if options.wait_after_load > 0:
|
||||||
|
log(f"Waiting {options.wait_after_load}s for page stabilization...", flush=True)
|
||||||
|
time.sleep(min(10.0, max(0.0, options.wait_after_load)))
|
||||||
|
if options.replace_video_posters:
|
||||||
|
log("Replacing video elements with posters...", flush=True)
|
||||||
|
page.evaluate(
|
||||||
|
"""
|
||||||
|
document.querySelectorAll('video').forEach(v => {
|
||||||
|
if (v.poster) {
|
||||||
|
const img = document.createElement('img');
|
||||||
|
img.src = v.poster;
|
||||||
|
img.style.maxWidth = '100%';
|
||||||
|
img.style.borderRadius = '12px';
|
||||||
|
v.replaceWith(img);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
# Attempt platform-specific target capture if requested (and not PDF)
|
||||||
|
element_captured = False
|
||||||
|
if options.prefer_platform_target and format_name != "pdf":
|
||||||
|
log("Attempting platform-specific content capture...", flush=True)
|
||||||
|
try:
|
||||||
|
_platform_preprocess(options.url, page, warnings)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
selectors = list(options.target_selectors or [])
|
||||||
|
if not selectors:
|
||||||
|
selectors = _selectors_for_url(options.url)
|
||||||
|
for sel in selectors:
|
||||||
|
try:
|
||||||
|
log(f"Trying selector: {sel}", flush=True)
|
||||||
|
el = page.wait_for_selector(sel, timeout=max(0, int(options.selector_timeout_ms)))
|
||||||
|
except PlaywrightTimeoutError:
|
||||||
|
log(f"Selector not found: {sel}", flush=True)
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
if el is not None:
|
||||||
|
log(f"Found element with selector: {sel}", flush=True)
|
||||||
|
try:
|
||||||
|
el.scroll_into_view_if_needed(timeout=1000)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
log(f"Capturing element to {destination}...", flush=True)
|
||||||
|
el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None))
|
||||||
|
element_captured = True
|
||||||
|
log("Element captured successfully", flush=True)
|
||||||
|
break
|
||||||
|
except Exception as exc:
|
||||||
|
warnings.append(f"element capture failed for '{sel}': {exc}")
|
||||||
|
log(f"Failed to capture element: {exc}", flush=True)
|
||||||
|
# Fallback to default capture paths
|
||||||
|
if element_captured:
|
||||||
|
pass
|
||||||
|
elif format_name == "pdf":
|
||||||
|
log("Generating PDF...", flush=True)
|
||||||
|
page.emulate_media(media="print")
|
||||||
|
page.pdf(path=str(destination), print_background=True)
|
||||||
|
log(f"PDF saved to {destination}", flush=True)
|
||||||
|
else:
|
||||||
|
log(f"Capturing full page to {destination}...", flush=True)
|
||||||
|
screenshot_kwargs: Dict[str, Any] = {"path": str(destination)}
|
||||||
|
if format_name == "jpeg":
|
||||||
|
screenshot_kwargs["type"] = "jpeg"
|
||||||
|
screenshot_kwargs["quality"] = 90
|
||||||
|
if options.full_page:
|
||||||
|
page.screenshot(full_page=True, **screenshot_kwargs)
|
||||||
|
else:
|
||||||
|
article = page.query_selector("article")
|
||||||
|
if article is not None:
|
||||||
|
article_kwargs = dict(screenshot_kwargs)
|
||||||
|
article_kwargs.pop("full_page", None)
|
||||||
|
article.screenshot(**article_kwargs)
|
||||||
|
else:
|
||||||
|
page.screenshot(**screenshot_kwargs)
|
||||||
|
log(f"Screenshot saved to {destination}", flush=True)
|
||||||
|
except Exception as exc:
|
||||||
|
raise ScreenshotError(f"Failed to capture screenshot: {exc}") from exc
|
||||||
|
finally:
|
||||||
|
log("Cleaning up browser resources...", flush=True)
|
||||||
|
with contextlib.suppress(Exception):
|
||||||
|
if context is not None:
|
||||||
|
context.close()
|
||||||
|
with contextlib.suppress(Exception):
|
||||||
|
if browser is not None:
|
||||||
|
browser.close()
|
||||||
|
with contextlib.suppress(Exception):
|
||||||
|
if playwright is not None:
|
||||||
|
playwright.stop()
|
||||||
|
log("Cleanup complete", flush=True)
|
||||||
|
|
||||||
|
|
||||||
|
def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
|
||||||
|
"""Capture a screenshot for the given options."""
|
||||||
|
destination = _prepare_output_path(options)
|
||||||
|
warnings: List[str] = []
|
||||||
|
_capture_with_playwright(options, destination, warnings)
|
||||||
|
|
||||||
|
known_urls = _unique_preserve_order([options.url, *options.known_urls])
|
||||||
|
archive_urls: List[str] = []
|
||||||
|
if options.archive:
|
||||||
|
archives, archive_warnings = _archive_url(options.url, options.archive_timeout)
|
||||||
|
archive_urls.extend(archives)
|
||||||
|
warnings.extend(archive_warnings)
|
||||||
|
if archives:
|
||||||
|
known_urls = _unique_preserve_order([*known_urls, *archives])
|
||||||
|
|
||||||
|
applied_tags = _unique_preserve_order(list(tag for tag in options.tags if tag.strip()))
|
||||||
|
|
||||||
|
return ScreenshotResult(
|
||||||
|
path=destination,
|
||||||
|
url=options.url,
|
||||||
|
tags_applied=applied_tags,
|
||||||
|
archive_urls=archive_urls,
|
||||||
|
known_urls=known_urls,
|
||||||
|
warnings=warnings,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Main Cmdlet Function
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
"""Take screenshots of URLs in the pipeline.
|
||||||
|
|
||||||
|
Accepts:
|
||||||
|
- Single result object (dict or PipeObject) with 'file_path' field
|
||||||
|
- List of result objects to screenshot each
|
||||||
|
- Direct URL as string
|
||||||
|
|
||||||
|
Emits PipeObject-formatted results for each screenshot with:
|
||||||
|
- action: 'cmdlet:screen-shot'
|
||||||
|
- is_temp: True (screenshots are temporary artifacts)
|
||||||
|
- parent_id: hash of the original file/URL
|
||||||
|
|
||||||
|
Screenshots are created using Playwright and marked as temporary
|
||||||
|
so they can be cleaned up later with the cleanup cmdlet.
|
||||||
|
"""
|
||||||
|
from ._shared import parse_cmdlet_args
|
||||||
|
|
||||||
|
# Help check
|
||||||
|
try:
|
||||||
|
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||||
|
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||||
|
return 0
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# ========================================================================
|
||||||
|
# ARGUMENT PARSING
|
||||||
|
# ========================================================================
|
||||||
|
|
||||||
|
parsed = parse_cmdlet_args(args, CMDLET)
|
||||||
|
|
||||||
|
format_value = parsed.get("format")
|
||||||
|
storage_value = parsed.get("storage")
|
||||||
|
selector_arg = parsed.get("selector")
|
||||||
|
selectors = [selector_arg] if selector_arg else []
|
||||||
|
archive_enabled = parsed.get("archive", False)
|
||||||
|
|
||||||
|
# Positional URL argument (if provided)
|
||||||
|
url_arg = parsed.get("url")
|
||||||
|
positional_urls = [str(url_arg)] if url_arg else []
|
||||||
|
|
||||||
|
# ========================================================================
|
||||||
|
# INPUT PROCESSING - Extract URLs from pipeline or command arguments
|
||||||
|
# ========================================================================
|
||||||
|
|
||||||
|
piped_results = normalize_result_input(result)
|
||||||
|
urls_to_process = []
|
||||||
|
|
||||||
|
# Extract URLs from piped results
|
||||||
|
if piped_results:
|
||||||
|
for item in piped_results:
|
||||||
|
url = None
|
||||||
|
if isinstance(item, dict):
|
||||||
|
url = item.get('file_path') or item.get('path') or item.get('url') or item.get('target')
|
||||||
|
else:
|
||||||
|
url = getattr(item, 'file_path', None) or getattr(item, 'path', None) or getattr(item, 'url', None) or getattr(item, 'target', None)
|
||||||
|
|
||||||
|
if url:
|
||||||
|
urls_to_process.append(str(url))
|
||||||
|
|
||||||
|
# Use positional arguments if no pipeline input
|
||||||
|
if not urls_to_process and positional_urls:
|
||||||
|
urls_to_process = positional_urls
|
||||||
|
|
||||||
|
if not urls_to_process:
|
||||||
|
log(f"No URLs to process for screen-shot cmdlet", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# ========================================================================
|
||||||
|
# OUTPUT DIRECTORY RESOLUTION - Priority chain
|
||||||
|
# ========================================================================
|
||||||
|
|
||||||
|
screenshot_dir: Optional[Path] = None
|
||||||
|
|
||||||
|
# Primary: Use --storage if provided (highest priority)
|
||||||
|
if storage_value:
|
||||||
|
try:
|
||||||
|
screenshot_dir = SharedArgs.resolve_storage(storage_value)
|
||||||
|
log(f"[screen_shot] Using --storage {storage_value}: {screenshot_dir}", flush=True)
|
||||||
|
except ValueError as e:
|
||||||
|
log(str(e), file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Secondary: Use config-based resolver ONLY if --storage not provided
|
||||||
|
if screenshot_dir is None and resolve_output_dir is not None:
|
||||||
|
try:
|
||||||
|
screenshot_dir = resolve_output_dir(config)
|
||||||
|
log(f"[screen_shot] Using config resolver: {screenshot_dir}", flush=True)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Tertiary: Use config outfile ONLY if neither --storage nor resolver worked
|
||||||
|
if screenshot_dir is None and config and config.get("outfile"):
|
||||||
|
try:
|
||||||
|
screenshot_dir = Path(config["outfile"]).expanduser()
|
||||||
|
log(f"[screen_shot] Using config outfile: {screenshot_dir}", flush=True)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Default: User's Videos directory
|
||||||
|
if screenshot_dir is None:
|
||||||
|
screenshot_dir = Path.home() / "Videos"
|
||||||
|
log(f"[screen_shot] Using default directory: {screenshot_dir}", flush=True)
|
||||||
|
|
||||||
|
_ensure_directory(screenshot_dir)
|
||||||
|
|
||||||
|
# ========================================================================
|
||||||
|
# PREPARE SCREENSHOT OPTIONS
|
||||||
|
# ========================================================================
|
||||||
|
|
||||||
|
format_name = _normalise_format(format_value)
|
||||||
|
filtered_selectors = [str(s).strip() for s in selectors if str(s).strip()]
|
||||||
|
target_selectors = filtered_selectors if filtered_selectors else None
|
||||||
|
|
||||||
|
all_emitted = []
|
||||||
|
exit_code = 0
|
||||||
|
# ========================================================================
|
||||||
|
# PROCESS URLs AND CAPTURE SCREENSHOTS
|
||||||
|
# ========================================================================
|
||||||
|
|
||||||
|
for url in urls_to_process:
|
||||||
|
# Validate URL format
|
||||||
|
if not url.lower().startswith(("http://", "https://", "file://")):
|
||||||
|
log(f"[screen_shot] Skipping non-URL input: {url}", file=sys.stderr)
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Create screenshot with provided options
|
||||||
|
options = ScreenshotOptions(
|
||||||
|
url=url,
|
||||||
|
output_dir=screenshot_dir,
|
||||||
|
output_format=format_name,
|
||||||
|
archive=archive_enabled,
|
||||||
|
target_selectors=target_selectors,
|
||||||
|
prefer_platform_target=False,
|
||||||
|
wait_for_article=False,
|
||||||
|
full_page=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
screenshot_result = _capture_screenshot(options)
|
||||||
|
|
||||||
|
# Log results and warnings
|
||||||
|
log(f"Screenshot captured to {screenshot_result.path}", flush=True)
|
||||||
|
if screenshot_result.archive_urls:
|
||||||
|
log(f"Archives: {', '.join(screenshot_result.archive_urls)}", flush=True)
|
||||||
|
for warning in screenshot_result.warnings:
|
||||||
|
log(f"Warning: {warning}", flush=True)
|
||||||
|
|
||||||
|
# Compute hash of screenshot file
|
||||||
|
screenshot_hash = None
|
||||||
|
try:
|
||||||
|
with open(screenshot_result.path, 'rb') as f:
|
||||||
|
screenshot_hash = hashlib.sha256(f.read()).hexdigest()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Create PipeObject result - marked as TEMP since derivative artifact
|
||||||
|
pipe_obj = create_pipe_object_result(
|
||||||
|
source='screenshot',
|
||||||
|
identifier=Path(screenshot_result.path).stem,
|
||||||
|
file_path=str(screenshot_result.path),
|
||||||
|
cmdlet_name='screen-shot',
|
||||||
|
title=f"Screenshot: {Path(screenshot_result.path).name}",
|
||||||
|
file_hash=screenshot_hash,
|
||||||
|
is_temp=True,
|
||||||
|
parent_hash=hashlib.sha256(url.encode()).hexdigest(),
|
||||||
|
extra={
|
||||||
|
'source_url': url,
|
||||||
|
'archive_urls': screenshot_result.archive_urls,
|
||||||
|
'known_urls': screenshot_result.known_urls,
|
||||||
|
'target': str(screenshot_result.path), # Explicit target for add-file
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Emit the result so downstream cmdlets (like add-file) can use it
|
||||||
|
pipeline_context.emit(pipe_obj)
|
||||||
|
all_emitted.append(pipe_obj)
|
||||||
|
|
||||||
|
except ScreenshotError as exc:
|
||||||
|
log(f"Error taking screenshot of {url}: {exc}", file=sys.stderr)
|
||||||
|
exit_code = 1
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Unexpected error taking screenshot of {url}: {exc}", file=sys.stderr)
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc(file=sys.stderr)
|
||||||
|
exit_code = 1
|
||||||
|
|
||||||
|
if not all_emitted:
|
||||||
|
log(f"No screenshots were successfully captured", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Log completion message
|
||||||
|
log(f"✓ Successfully captured {len(all_emitted)} screenshot(s)", flush=True)
|
||||||
|
|
||||||
|
return exit_code
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name="screen-shot",
|
||||||
|
summary="Capture a screenshot of a URL or file and mark as temporary artifact",
|
||||||
|
usage="screen-shot <url> [options] or download-data <url> | screen-shot [options]",
|
||||||
|
aliases=["screenshot", "ss"],
|
||||||
|
args=[
|
||||||
|
CmdletArg(name="url", type="string", required=False, description="URL to screenshot (or from pipeline)"),
|
||||||
|
CmdletArg(name="format", type="string", description="Output format: png, jpeg, or pdf"),
|
||||||
|
CmdletArg(name="selector", type="string", description="CSS selector for element capture"),
|
||||||
|
SharedArgs.ARCHIVE, # Use shared archive argument
|
||||||
|
SharedArgs.STORAGE, # Use shared storage argument
|
||||||
|
],
|
||||||
|
details=[
|
||||||
|
"Take screenshots of URLs with optional archiving and element targeting.",
|
||||||
|
"Screenshots are marked as temporary artifacts for cleanup by the cleanup cmdlet.",
|
||||||
|
"",
|
||||||
|
"Arguments:",
|
||||||
|
" url URL to capture (optional if piped from pipeline)",
|
||||||
|
" --format FORMAT Output format: png (default), jpeg, or pdf",
|
||||||
|
" --selector SEL CSS selector for capturing specific element",
|
||||||
|
" --archive, -arch Archive URL to Wayback/Archive.today/Archive.ph",
|
||||||
|
" --storage LOCATION Storage destination: hydrus, local, 0x0, debrid, or ftp",
|
||||||
|
"",
|
||||||
|
"Examples:",
|
||||||
|
" download-data https://example.com | screen-shot --storage local",
|
||||||
|
" download-data https://twitter.com/user/status/123 | screen-shot --selector 'article[role=article]' --storage hydrus --archive",
|
||||||
|
" screen-shot https://example.com --format jpeg --storage 0x0 --archive",
|
||||||
|
]
|
||||||
|
)
|
||||||
351
cmdlets/search_file.py
Normal file
351
cmdlets/search_file.py
Normal file
@@ -0,0 +1,351 @@
|
|||||||
|
"""Search-file cmdlet: Search for files by query, tags, size, type, duration, etc."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, Sequence, List, Optional, Tuple, Callable
|
||||||
|
from fnmatch import fnmatchcase
|
||||||
|
from pathlib import Path
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from helper.logger import log, debug
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
from helper.file_storage import FileStorage
|
||||||
|
from helper.search_provider import get_provider, list_providers, SearchResult
|
||||||
|
from metadata import import_pending_sidecars
|
||||||
|
|
||||||
|
from . import register
|
||||||
|
from ._shared import Cmdlet, CmdletArg
|
||||||
|
import models
|
||||||
|
import pipeline as ctx
|
||||||
|
|
||||||
|
# Optional dependencies
|
||||||
|
try:
|
||||||
|
import mutagen # type: ignore
|
||||||
|
except ImportError: # pragma: no cover
|
||||||
|
mutagen = None # type: ignore
|
||||||
|
|
||||||
|
try:
|
||||||
|
from config import get_hydrus_url, resolve_output_dir
|
||||||
|
except Exception: # pragma: no cover
|
||||||
|
get_hydrus_url = None # type: ignore
|
||||||
|
resolve_output_dir = None # type: ignore
|
||||||
|
|
||||||
|
try:
|
||||||
|
from helper.hydrus import HydrusClient, HydrusRequestError
|
||||||
|
except ImportError: # pragma: no cover
|
||||||
|
HydrusClient = None # type: ignore
|
||||||
|
HydrusRequestError = RuntimeError # type: ignore
|
||||||
|
|
||||||
|
try:
|
||||||
|
from helper.utils import sha256_file
|
||||||
|
except ImportError: # pragma: no cover
|
||||||
|
sha256_file = None # type: ignore
|
||||||
|
|
||||||
|
try:
|
||||||
|
from helper.utils_constant import mime_maps
|
||||||
|
except ImportError: # pragma: no cover
|
||||||
|
mime_maps = {} # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Data Classes (from helper/search.py)
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class SearchRecord:
|
||||||
|
path: str
|
||||||
|
size_bytes: int | None = None
|
||||||
|
duration_seconds: str | None = None
|
||||||
|
tags: str | None = None
|
||||||
|
hash_hex: str | None = None
|
||||||
|
|
||||||
|
def as_dict(self) -> dict[str, str]:
|
||||||
|
payload: dict[str, str] = {"path": self.path}
|
||||||
|
if self.size_bytes is not None:
|
||||||
|
payload["size"] = str(self.size_bytes)
|
||||||
|
if self.duration_seconds:
|
||||||
|
payload["duration"] = self.duration_seconds
|
||||||
|
if self.tags:
|
||||||
|
payload["tags"] = self.tags
|
||||||
|
if self.hash_hex:
|
||||||
|
payload["hash"] = self.hash_hex
|
||||||
|
return payload
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ResultItem:
|
||||||
|
origin: str
|
||||||
|
title: str
|
||||||
|
detail: str
|
||||||
|
annotations: List[str]
|
||||||
|
target: str
|
||||||
|
media_kind: str = "other"
|
||||||
|
hash_hex: Optional[str] = None
|
||||||
|
columns: List[tuple[str, str]] = field(default_factory=list)
|
||||||
|
tag_summary: Optional[str] = None
|
||||||
|
duration_seconds: Optional[float] = None
|
||||||
|
size_bytes: Optional[int] = None
|
||||||
|
full_metadata: Optional[Dict[str, Any]] = None
|
||||||
|
tags: Optional[set[str]] = field(default_factory=set)
|
||||||
|
relationships: Optional[List[str]] = field(default_factory=list)
|
||||||
|
known_urls: Optional[List[str]] = field(default_factory=list)
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
payload: Dict[str, Any] = {
|
||||||
|
"title": self.title,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Always include these core fields for downstream cmdlets (get-file, download-data, etc)
|
||||||
|
payload["origin"] = self.origin
|
||||||
|
payload["target"] = self.target
|
||||||
|
payload["media_kind"] = self.media_kind
|
||||||
|
|
||||||
|
# Always include full_metadata if present (needed by download-data, etc)
|
||||||
|
# This is NOT for display, but for downstream processing
|
||||||
|
if self.full_metadata:
|
||||||
|
payload["full_metadata"] = self.full_metadata
|
||||||
|
|
||||||
|
# Include columns if defined (result renderer will use these for display)
|
||||||
|
if self.columns:
|
||||||
|
payload["columns"] = list(self.columns)
|
||||||
|
else:
|
||||||
|
# If no columns, include the detail for backwards compatibility
|
||||||
|
payload["detail"] = self.detail
|
||||||
|
payload["annotations"] = list(self.annotations)
|
||||||
|
|
||||||
|
# Include optional fields
|
||||||
|
if self.hash_hex:
|
||||||
|
payload["hash"] = self.hash_hex
|
||||||
|
if self.tag_summary:
|
||||||
|
payload["tags"] = self.tag_summary
|
||||||
|
if self.tags:
|
||||||
|
payload["tags_set"] = list(self.tags)
|
||||||
|
if self.relationships:
|
||||||
|
payload["relationships"] = self.relationships
|
||||||
|
if self.known_urls:
|
||||||
|
payload["known_urls"] = self.known_urls
|
||||||
|
return payload
|
||||||
|
|
||||||
|
|
||||||
|
STORAGE_ORIGINS = {"local", "hydrus", "debrid"}
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_storage_columns(payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""Attach Title/Store columns for storage-origin results to keep CLI display compact."""
|
||||||
|
origin_value = str(payload.get("origin") or payload.get("source") or "").lower()
|
||||||
|
if origin_value not in STORAGE_ORIGINS:
|
||||||
|
return payload
|
||||||
|
title = payload.get("title") or payload.get("name") or payload.get("target") or payload.get("path") or "Result"
|
||||||
|
store_label = payload.get("origin") or payload.get("source") or origin_value
|
||||||
|
normalized = dict(payload)
|
||||||
|
normalized["columns"] = [("Title", str(title)), ("Store", str(store_label))]
|
||||||
|
return normalized
|
||||||
|
|
||||||
|
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name="search-file",
|
||||||
|
summary="Unified search cmdlet for searchable backends (Hydrus, Local, Debrid, LibGen, OpenLibrary, Soulseek).",
|
||||||
|
usage="search-file [query] [-tag TAG] [-size >100MB|<50MB] [-type audio|video|image] [-duration >10:00] [-storage BACKEND] [-provider PROVIDER]",
|
||||||
|
args=[
|
||||||
|
CmdletArg("query", description="Search query string"),
|
||||||
|
CmdletArg("tag", description="Filter by tag (can be used multiple times)"),
|
||||||
|
CmdletArg("size", description="Filter by size: >100MB, <50MB, =10MB"),
|
||||||
|
CmdletArg("type", description="Filter by type: audio, video, image, document"),
|
||||||
|
CmdletArg("duration", description="Filter by duration: >10:00, <1:30:00"),
|
||||||
|
CmdletArg("limit", type="integer", description="Limit results (default: 100)"),
|
||||||
|
CmdletArg("storage", description="Search storage backend: hydrus, local, debrid (default: all searchable)"),
|
||||||
|
CmdletArg("provider", description="Search provider: libgen, openlibrary, soulseek, debrid, local (overrides -storage)"),
|
||||||
|
],
|
||||||
|
details=[
|
||||||
|
"Search across multiple providers: File storage (Hydrus, Local, Debrid), Books (LibGen, OpenLibrary), Music (Soulseek)",
|
||||||
|
"Use -provider to search a specific source, or -storage to search file backends",
|
||||||
|
"Filter results by: tag, size, type, duration",
|
||||||
|
"Results can be piped to other commands",
|
||||||
|
"Examples:",
|
||||||
|
"search-file foo # Search all file backends",
|
||||||
|
"search-file -provider libgen 'python programming' # Search LibGen books",
|
||||||
|
"search-file -provider debrid 'movie' # Search AllDebrid magnets",
|
||||||
|
"search-file 'music' -provider soulseek # Search Soulseek P2P",
|
||||||
|
"search-file -provider openlibrary 'tolkien' # Search OpenLibrary",
|
||||||
|
"search-file song -storage hydrus -type audio # Search only Hydrus audio",
|
||||||
|
"search-file movie -tag action -provider debrid # Debrid with filters",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@register(["search-file", "search"])
|
||||||
|
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
"""Search across multiple providers: Hydrus, Local, Debrid, LibGen, etc."""
|
||||||
|
args_list = [str(arg) for arg in (args or [])]
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
query = ""
|
||||||
|
tag_filters: List[str] = []
|
||||||
|
size_filter: Optional[Tuple[str, int]] = None
|
||||||
|
duration_filter: Optional[Tuple[str, float]] = None
|
||||||
|
type_filter: Optional[str] = None
|
||||||
|
storage_backend: Optional[str] = None
|
||||||
|
provider_name: Optional[str] = None
|
||||||
|
limit = 100
|
||||||
|
|
||||||
|
# Simple argument parsing
|
||||||
|
i = 0
|
||||||
|
while i < len(args_list):
|
||||||
|
arg = args_list[i]
|
||||||
|
low = arg.lower()
|
||||||
|
|
||||||
|
if low in {"-provider", "--provider"} and i + 1 < len(args_list):
|
||||||
|
provider_name = args_list[i + 1].lower()
|
||||||
|
i += 2
|
||||||
|
elif low in {"-storage", "--storage"} and i + 1 < len(args_list):
|
||||||
|
storage_backend = args_list[i + 1].lower()
|
||||||
|
i += 2
|
||||||
|
elif low in {"-tag", "--tag"} and i + 1 < len(args_list):
|
||||||
|
tag_filters.append(args_list[i + 1])
|
||||||
|
i += 2
|
||||||
|
elif low in {"-limit", "--limit"} and i + 1 < len(args_list):
|
||||||
|
try:
|
||||||
|
limit = int(args_list[i + 1])
|
||||||
|
except ValueError:
|
||||||
|
limit = 100
|
||||||
|
i += 2
|
||||||
|
elif low in {"-type", "--type"} and i + 1 < len(args_list):
|
||||||
|
type_filter = args_list[i + 1].lower()
|
||||||
|
i += 2
|
||||||
|
elif not query and not arg.startswith("-"):
|
||||||
|
query = arg
|
||||||
|
i += 1
|
||||||
|
else:
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
if not query:
|
||||||
|
log("Provide a search query", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Initialize worker for this search command
|
||||||
|
from helper.local_library import LocalLibraryDB
|
||||||
|
from config import get_local_storage_path
|
||||||
|
import uuid
|
||||||
|
worker_id = str(uuid.uuid4())
|
||||||
|
library_root = get_local_storage_path(config or {})
|
||||||
|
if not library_root:
|
||||||
|
log("No library root configured", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
db = LocalLibraryDB(library_root)
|
||||||
|
db.insert_worker(
|
||||||
|
worker_id,
|
||||||
|
"search",
|
||||||
|
title=f"Search: {query}",
|
||||||
|
description=f"Query: {query}",
|
||||||
|
pipe=ctx.get_current_command_text()
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
results_list = []
|
||||||
|
|
||||||
|
# Try to search using provider (libgen, soulseek, debrid, openlibrary)
|
||||||
|
if provider_name:
|
||||||
|
debug(f"[search_file] Attempting provider search with: {provider_name}")
|
||||||
|
provider = get_provider(provider_name, config)
|
||||||
|
if not provider:
|
||||||
|
log(f"Provider '{provider_name}' not available", file=sys.stderr)
|
||||||
|
db.update_worker_status(worker_id, 'error')
|
||||||
|
return 1
|
||||||
|
|
||||||
|
debug(f"[search_file] Provider loaded, calling search with query: {query}")
|
||||||
|
search_result = provider.search(query, limit=limit)
|
||||||
|
debug(f"[search_file] Provider search returned {len(search_result)} results")
|
||||||
|
|
||||||
|
for item in search_result:
|
||||||
|
item_dict = item.to_dict()
|
||||||
|
results_list.append(item_dict)
|
||||||
|
ctx.emit(item_dict)
|
||||||
|
|
||||||
|
debug(f"[search_file] Emitted {len(results_list)} results")
|
||||||
|
|
||||||
|
# Write results to worker stdout
|
||||||
|
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
|
||||||
|
db.update_worker_status(worker_id, 'completed')
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Otherwise search using FileStorage (Hydrus, Local, Debrid backends)
|
||||||
|
from helper.file_storage import FileStorage
|
||||||
|
storage = FileStorage(config=config or {})
|
||||||
|
|
||||||
|
backend_to_search = storage_backend or None
|
||||||
|
if backend_to_search:
|
||||||
|
# Check if requested backend is available
|
||||||
|
if backend_to_search == "hydrus":
|
||||||
|
from helper.hydrus import is_hydrus_available
|
||||||
|
if not is_hydrus_available(config or {}):
|
||||||
|
log(f"Backend 'hydrus' is not available (Hydrus service not running)", file=sys.stderr)
|
||||||
|
db.update_worker_status(worker_id, 'error')
|
||||||
|
return 1
|
||||||
|
if not storage.supports_search(backend_to_search):
|
||||||
|
log(f"Backend '{backend_to_search}' does not support searching", file=sys.stderr)
|
||||||
|
db.update_worker_status(worker_id, 'error')
|
||||||
|
return 1
|
||||||
|
results = storage[backend_to_search].search(query, limit=limit)
|
||||||
|
else:
|
||||||
|
# Search all searchable backends, but skip hydrus if unavailable
|
||||||
|
from helper.hydrus import is_hydrus_available
|
||||||
|
hydrus_available = is_hydrus_available(config or {})
|
||||||
|
|
||||||
|
all_results = []
|
||||||
|
for backend_name in storage.list_searchable_backends():
|
||||||
|
# Skip hydrus if not available
|
||||||
|
if backend_name == "hydrus" and not hydrus_available:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
backend_results = storage[backend_name].search(query, limit=limit - len(all_results))
|
||||||
|
if backend_results:
|
||||||
|
all_results.extend(backend_results)
|
||||||
|
if len(all_results) >= limit:
|
||||||
|
break
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Backend {backend_name} search failed: {exc}", file=sys.stderr)
|
||||||
|
results = all_results[:limit]
|
||||||
|
|
||||||
|
# Emit results and collect for workers table
|
||||||
|
if results:
|
||||||
|
for item in results:
|
||||||
|
if isinstance(item, dict):
|
||||||
|
normalized = _ensure_storage_columns(item)
|
||||||
|
results_list.append(normalized)
|
||||||
|
ctx.emit(normalized)
|
||||||
|
elif isinstance(item, ResultItem):
|
||||||
|
item_dict = item.to_dict()
|
||||||
|
results_list.append(item_dict)
|
||||||
|
ctx.emit(item_dict)
|
||||||
|
else:
|
||||||
|
item_dict = {"title": str(item)}
|
||||||
|
results_list.append(item_dict)
|
||||||
|
ctx.emit(item_dict)
|
||||||
|
|
||||||
|
# Write results to worker stdout
|
||||||
|
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
|
||||||
|
else:
|
||||||
|
log("No results found", file=sys.stderr)
|
||||||
|
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
|
||||||
|
|
||||||
|
db.update_worker_status(worker_id, 'completed')
|
||||||
|
return 0
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Search failed: {exc}", file=sys.stderr)
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc(file=sys.stderr)
|
||||||
|
db.update_worker_status(worker_id, 'error')
|
||||||
|
return 1
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Always close the database connection
|
||||||
|
try:
|
||||||
|
db.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
325
cmdlets/worker.py
Normal file
325
cmdlets/worker.py
Normal file
@@ -0,0 +1,325 @@
|
|||||||
|
"""Worker cmdlet: Display workers table in ResultTable format."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, Sequence, List
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from . import register
|
||||||
|
from ._shared import Cmdlet, CmdletArg
|
||||||
|
import pipeline as ctx
|
||||||
|
from helper.logger import log
|
||||||
|
from config import get_local_storage_path
|
||||||
|
|
||||||
|
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name=".worker",
|
||||||
|
summary="Display workers table in result table format.",
|
||||||
|
usage=".worker [status] [-limit N] [@N]",
|
||||||
|
args=[
|
||||||
|
CmdletArg("status", description="Filter by status: running, completed, error (default: all)"),
|
||||||
|
CmdletArg("limit", type="integer", description="Limit results (default: 100)"),
|
||||||
|
CmdletArg("@N", description="Select worker by index (1-based) and display full logs"),
|
||||||
|
],
|
||||||
|
details=[
|
||||||
|
"- Shows all background worker tasks and their output",
|
||||||
|
"- Can filter by status: running, completed, error",
|
||||||
|
"- Search result stdout is captured from each worker",
|
||||||
|
"- Use @N to select a specific worker by index and display its full logs",
|
||||||
|
"Examples:",
|
||||||
|
".worker # Show all workers",
|
||||||
|
".worker running # Show running workers only",
|
||||||
|
".worker completed -limit 50 # Show 50 most recent completed workers",
|
||||||
|
".worker @3 # Show full logs for the 3rd worker",
|
||||||
|
".worker running @2 # Show full logs for the 2nd running worker",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@register([".worker", "worker", "workers"])
|
||||||
|
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
"""Display workers table or show detailed logs for a specific worker."""
|
||||||
|
args_list = [str(arg) for arg in (args or [])]
|
||||||
|
selection_indices = ctx.get_last_selection()
|
||||||
|
selection_requested = bool(selection_indices) and isinstance(result, list) and len(result) > 0
|
||||||
|
|
||||||
|
# Parse arguments for list view
|
||||||
|
status_filter: str | None = None
|
||||||
|
limit = 100
|
||||||
|
clear_requested = False
|
||||||
|
worker_id_arg: str | None = None
|
||||||
|
i = 0
|
||||||
|
while i < len(args_list):
|
||||||
|
arg = args_list[i]
|
||||||
|
low = arg.lower()
|
||||||
|
if low in {"-limit", "--limit"} and i + 1 < len(args_list):
|
||||||
|
try:
|
||||||
|
limit = max(1, int(args_list[i + 1]))
|
||||||
|
except ValueError:
|
||||||
|
limit = 100
|
||||||
|
i += 2
|
||||||
|
elif low in {"-id", "--id"} and i + 1 < len(args_list):
|
||||||
|
worker_id_arg = args_list[i + 1]
|
||||||
|
i += 2
|
||||||
|
elif low in {"-clear", "--clear"}:
|
||||||
|
clear_requested = True
|
||||||
|
i += 1
|
||||||
|
elif low in {"running", "completed", "error", "cancelled"}:
|
||||||
|
status_filter = low
|
||||||
|
i += 1
|
||||||
|
elif not arg.startswith("-"):
|
||||||
|
status_filter = low
|
||||||
|
i += 1
|
||||||
|
else:
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||||
|
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||||
|
return 0
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
library_root = get_local_storage_path(config or {})
|
||||||
|
if not library_root:
|
||||||
|
log("No library root configured", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
from helper.local_library import LocalLibraryDB
|
||||||
|
db: LocalLibraryDB | None = None
|
||||||
|
try:
|
||||||
|
db = LocalLibraryDB(library_root)
|
||||||
|
if clear_requested:
|
||||||
|
count = db.clear_finished_workers()
|
||||||
|
log(f"Cleared {count} finished workers.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if worker_id_arg:
|
||||||
|
worker = db.get_worker(worker_id_arg)
|
||||||
|
if worker:
|
||||||
|
events = []
|
||||||
|
try:
|
||||||
|
wid = worker.get("worker_id")
|
||||||
|
if wid and hasattr(db, "get_worker_events"):
|
||||||
|
events = db.get_worker_events(wid)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
_emit_worker_detail(worker, events)
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
log(f"Worker not found: {worker_id_arg}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if selection_requested:
|
||||||
|
return _render_worker_selection(db, result)
|
||||||
|
return _render_worker_list(db, status_filter, limit)
|
||||||
|
finally:
|
||||||
|
if db:
|
||||||
|
db.close()
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Workers query failed: {exc}", file=sys.stderr)
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc(file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
def _render_worker_list(db, status_filter: str | None, limit: int) -> int:
|
||||||
|
workers = db.get_all_workers(limit=limit)
|
||||||
|
if status_filter:
|
||||||
|
workers = [w for w in workers if str(w.get("status", "")).lower() == status_filter]
|
||||||
|
|
||||||
|
if not workers:
|
||||||
|
log("No workers found", file=sys.stderr)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
for worker in workers:
|
||||||
|
started = worker.get("started_at", "")
|
||||||
|
ended = worker.get("completed_at", worker.get("last_updated", ""))
|
||||||
|
|
||||||
|
date_str = _extract_date(started)
|
||||||
|
start_time = _format_event_timestamp(started)
|
||||||
|
end_time = _format_event_timestamp(ended)
|
||||||
|
|
||||||
|
item = {
|
||||||
|
"columns": [
|
||||||
|
("Status", worker.get("status", "")),
|
||||||
|
("Pipe", _summarize_pipe(worker.get("pipe"))),
|
||||||
|
("Date", date_str),
|
||||||
|
("Start Time", start_time),
|
||||||
|
("End Time", end_time),
|
||||||
|
],
|
||||||
|
"__worker_metadata": worker,
|
||||||
|
"_selection_args": ["-id", worker.get("worker_id")]
|
||||||
|
}
|
||||||
|
ctx.emit(item)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def _render_worker_selection(db, selected_items: Any) -> int:
|
||||||
|
if not isinstance(selected_items, list):
|
||||||
|
log("Selection payload missing", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
emitted = False
|
||||||
|
for item in selected_items:
|
||||||
|
worker = _resolve_worker_record(db, item)
|
||||||
|
if not worker:
|
||||||
|
continue
|
||||||
|
events = []
|
||||||
|
try:
|
||||||
|
events = db.get_worker_events(worker.get("worker_id")) if hasattr(db, "get_worker_events") else []
|
||||||
|
except Exception:
|
||||||
|
events = []
|
||||||
|
_emit_worker_detail(worker, events)
|
||||||
|
emitted = True
|
||||||
|
if not emitted:
|
||||||
|
log("Selected rows no longer exist", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_worker_record(db, payload: Any) -> Dict[str, Any] | None:
|
||||||
|
if not isinstance(payload, dict):
|
||||||
|
return None
|
||||||
|
worker_data = payload.get("__worker_metadata")
|
||||||
|
worker_id = None
|
||||||
|
if isinstance(worker_data, dict):
|
||||||
|
worker_id = worker_data.get("worker_id")
|
||||||
|
else:
|
||||||
|
worker_id = payload.get("worker_id")
|
||||||
|
worker_data = None
|
||||||
|
if worker_id:
|
||||||
|
fresh = db.get_worker(worker_id)
|
||||||
|
if fresh:
|
||||||
|
return fresh
|
||||||
|
return worker_data if isinstance(worker_data, dict) else None
|
||||||
|
|
||||||
|
|
||||||
|
def _emit_worker_detail(worker: Dict[str, Any], events: List[Dict[str, Any]]) -> None:
|
||||||
|
# Parse stdout logs into rows
|
||||||
|
stdout_content = worker.get("stdout", "") or ""
|
||||||
|
|
||||||
|
# Try to parse lines if they follow the standard log format
|
||||||
|
# Format: YYYY-MM-DD HH:MM:SS - name - level - message
|
||||||
|
lines = stdout_content.splitlines()
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Default values
|
||||||
|
timestamp = ""
|
||||||
|
level = "INFO"
|
||||||
|
message = line
|
||||||
|
|
||||||
|
# Try to parse standard format
|
||||||
|
try:
|
||||||
|
parts = line.split(" - ", 3)
|
||||||
|
if len(parts) >= 4:
|
||||||
|
# Full format
|
||||||
|
ts_str, _, lvl, msg = parts
|
||||||
|
timestamp = _format_event_timestamp(ts_str)
|
||||||
|
level = lvl
|
||||||
|
message = msg
|
||||||
|
elif len(parts) == 3:
|
||||||
|
# Missing name or level
|
||||||
|
ts_str, lvl, msg = parts
|
||||||
|
timestamp = _format_event_timestamp(ts_str)
|
||||||
|
level = lvl
|
||||||
|
message = msg
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
item = {
|
||||||
|
"columns": [
|
||||||
|
("Time", timestamp),
|
||||||
|
("Level", level),
|
||||||
|
("Message", message)
|
||||||
|
]
|
||||||
|
}
|
||||||
|
ctx.emit(item)
|
||||||
|
|
||||||
|
# Also emit events if available and not redundant
|
||||||
|
# (For now, just focusing on stdout logs as requested)
|
||||||
|
|
||||||
|
|
||||||
|
def _summarize_pipe(pipe_value: Any, limit: int = 60) -> str:
|
||||||
|
text = str(pipe_value or "").strip()
|
||||||
|
if not text:
|
||||||
|
return "(none)"
|
||||||
|
return text if len(text) <= limit else text[: limit - 3] + "..."
|
||||||
|
|
||||||
|
|
||||||
|
def _format_event_timestamp(raw_timestamp: Any) -> str:
|
||||||
|
dt = _parse_to_local(raw_timestamp)
|
||||||
|
if dt:
|
||||||
|
return dt.strftime("%H:%M:%S")
|
||||||
|
|
||||||
|
if not raw_timestamp:
|
||||||
|
return "--:--:--"
|
||||||
|
text = str(raw_timestamp)
|
||||||
|
if "T" in text:
|
||||||
|
time_part = text.split("T", 1)[1]
|
||||||
|
elif " " in text:
|
||||||
|
time_part = text.split(" ", 1)[1]
|
||||||
|
else:
|
||||||
|
time_part = text
|
||||||
|
return time_part[:8] if len(time_part) >= 8 else time_part
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_to_local(timestamp_str: Any) -> datetime | None:
|
||||||
|
if not timestamp_str:
|
||||||
|
return None
|
||||||
|
text = str(timestamp_str).strip()
|
||||||
|
if not text:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Check for T separator (Python isoformat - Local time)
|
||||||
|
if 'T' in text:
|
||||||
|
return datetime.fromisoformat(text)
|
||||||
|
|
||||||
|
# Check for space separator (SQLite CURRENT_TIMESTAMP - UTC)
|
||||||
|
# Format: YYYY-MM-DD HH:MM:SS
|
||||||
|
if ' ' in text:
|
||||||
|
# Assume UTC
|
||||||
|
dt = datetime.strptime(text, "%Y-%m-%d %H:%M:%S")
|
||||||
|
dt = dt.replace(tzinfo=timezone.utc)
|
||||||
|
return dt.astimezone() # Convert to local
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_date(raw_timestamp: Any) -> str:
|
||||||
|
dt = _parse_to_local(raw_timestamp)
|
||||||
|
if dt:
|
||||||
|
return dt.strftime("%m-%d-%y")
|
||||||
|
|
||||||
|
# Fallback
|
||||||
|
if not raw_timestamp:
|
||||||
|
return ""
|
||||||
|
text = str(raw_timestamp)
|
||||||
|
# Extract YYYY-MM-DD part
|
||||||
|
date_part = ""
|
||||||
|
if "T" in text:
|
||||||
|
date_part = text.split("T", 1)[0]
|
||||||
|
elif " " in text:
|
||||||
|
date_part = text.split(" ", 1)[0]
|
||||||
|
else:
|
||||||
|
date_part = text
|
||||||
|
|
||||||
|
# Convert YYYY-MM-DD to MM-DD-YY
|
||||||
|
try:
|
||||||
|
parts = date_part.split("-")
|
||||||
|
if len(parts) == 3:
|
||||||
|
year, month, day = parts
|
||||||
|
return f"{month}-{day}-{year[2:]}"
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return date_part
|
||||||
360
config.py
Normal file
360
config.py
Normal file
@@ -0,0 +1,360 @@
|
|||||||
|
|
||||||
|
"""Unified configuration helpers for downlow."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
from pathlib import Path
|
||||||
|
from helper.logger import log
|
||||||
|
|
||||||
|
DEFAULT_CONFIG_FILENAME = "config.json"
|
||||||
|
SCRIPT_DIR = Path(__file__).resolve().parent
|
||||||
|
|
||||||
|
_CONFIG_CACHE: Dict[str, Dict[str, Any]] = {}
|
||||||
|
|
||||||
|
|
||||||
|
def _make_cache_key(config_dir: Optional[Path], filename: str, actual_path: Optional[Path]) -> str:
|
||||||
|
if actual_path:
|
||||||
|
return str(actual_path.resolve())
|
||||||
|
base_dir = (config_dir or SCRIPT_DIR)
|
||||||
|
return str((base_dir / filename).resolve())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_hydrus_instance(config: Dict[str, Any], instance_name: str = "home") -> Optional[Dict[str, Any]]:
|
||||||
|
"""Get a specific Hydrus instance config by name.
|
||||||
|
|
||||||
|
Supports both formats:
|
||||||
|
- New: config["storage"]["hydrus"][instance_name] = {"key": "...", "url": "..."}
|
||||||
|
- Old: config["HydrusNetwork"][instance_name] = {"key": "...", "url": "..."}
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config: Configuration dict
|
||||||
|
instance_name: Name of the Hydrus instance (default: "home")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with "key" and "url" keys, or None if not found
|
||||||
|
"""
|
||||||
|
# Try new format first
|
||||||
|
storage = config.get("storage", {})
|
||||||
|
if isinstance(storage, dict):
|
||||||
|
hydrus_config = storage.get("hydrus", {})
|
||||||
|
if isinstance(hydrus_config, dict):
|
||||||
|
instance = hydrus_config.get(instance_name)
|
||||||
|
if isinstance(instance, dict):
|
||||||
|
return instance
|
||||||
|
|
||||||
|
# Fall back to old format
|
||||||
|
hydrus_network = config.get("HydrusNetwork")
|
||||||
|
if not isinstance(hydrus_network, dict):
|
||||||
|
return None
|
||||||
|
|
||||||
|
instance = hydrus_network.get(instance_name)
|
||||||
|
if isinstance(instance, dict):
|
||||||
|
return instance
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_hydrus_access_key(config: Dict[str, Any], instance_name: str = "home") -> Optional[str]:
|
||||||
|
"""Get Hydrus access key for an instance.
|
||||||
|
|
||||||
|
Supports both old flat format and new nested format:
|
||||||
|
- Old: config["HydrusNetwork_Access_Key"]
|
||||||
|
- New: config["HydrusNetwork"][instance_name]["key"]
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config: Configuration dict
|
||||||
|
instance_name: Name of the Hydrus instance (default: "home")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Access key string, or None if not found
|
||||||
|
"""
|
||||||
|
instance = get_hydrus_instance(config, instance_name)
|
||||||
|
key = instance.get("key") if instance else config.get("HydrusNetwork_Access_Key")
|
||||||
|
return str(key).strip() if key else None
|
||||||
|
|
||||||
|
|
||||||
|
def get_hydrus_url(config: Dict[str, Any], instance_name: str = "home") -> Optional[str]:
|
||||||
|
"""Get Hydrus URL for an instance.
|
||||||
|
|
||||||
|
Supports both old flat format and new nested format:
|
||||||
|
- Old: config["HydrusNetwork_URL"] or constructed from IP/Port/HTTPS
|
||||||
|
- New: config["HydrusNetwork"][instance_name]["url"]
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config: Configuration dict
|
||||||
|
instance_name: Name of the Hydrus instance (default: "home")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
URL string, or None if not found
|
||||||
|
"""
|
||||||
|
instance = get_hydrus_instance(config, instance_name)
|
||||||
|
url = instance.get("url") if instance else config.get("HydrusNetwork_URL")
|
||||||
|
if url: # Check if not None and not empty
|
||||||
|
return str(url).strip()
|
||||||
|
# Build from IP/Port/HTTPS if not found
|
||||||
|
host = str(config.get("HydrusNetwork_IP") or "localhost").strip() or "localhost"
|
||||||
|
port = str(config.get("HydrusNetwork_Port") or "45869").strip()
|
||||||
|
scheme = "https" if str(config.get("HydrusNetwork_Use_HTTPS") or "").strip().lower() in {"1", "true", "yes", "on"} else "http"
|
||||||
|
authority = host if not (":" in host and not host.startswith("[")) else f"[{host}]"
|
||||||
|
return f"{scheme}://{authority}:{port}"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_output_dir(config: Dict[str, Any]) -> Path:
|
||||||
|
"""Resolve output directory from config with single source of truth.
|
||||||
|
|
||||||
|
Priority:
|
||||||
|
1. config["temp"] - explicitly set temp/output directory
|
||||||
|
2. config["outfile"] - fallback to outfile setting
|
||||||
|
3. Home/Videos - safe user directory fallback
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Path to output directory
|
||||||
|
"""
|
||||||
|
# First try explicit temp setting from config
|
||||||
|
temp_value = config.get("temp")
|
||||||
|
if temp_value:
|
||||||
|
try:
|
||||||
|
path = Path(str(temp_value)).expanduser()
|
||||||
|
# Verify we can access it (not a system directory with permission issues)
|
||||||
|
if path.exists() or path.parent.exists():
|
||||||
|
return path
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Then try outfile setting
|
||||||
|
outfile_value = config.get("outfile")
|
||||||
|
if outfile_value:
|
||||||
|
try:
|
||||||
|
return Path(str(outfile_value)).expanduser()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Fallback to user's Videos directory
|
||||||
|
return Path.home() / "Videos"
|
||||||
|
|
||||||
|
|
||||||
|
def get_local_storage_path(config: Dict[str, Any]) -> Optional[Path]:
|
||||||
|
"""Get local storage path from config.
|
||||||
|
|
||||||
|
Supports both formats:
|
||||||
|
- New: config["storage"]["local"]["path"]
|
||||||
|
- Old: config["Local"]["path"]
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config: Configuration dict
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Path object if found, None otherwise
|
||||||
|
"""
|
||||||
|
# Try new format first
|
||||||
|
storage = config.get("storage", {})
|
||||||
|
if isinstance(storage, dict):
|
||||||
|
local_config = storage.get("local", {})
|
||||||
|
if isinstance(local_config, dict):
|
||||||
|
path_str = local_config.get("path")
|
||||||
|
if path_str:
|
||||||
|
return Path(str(path_str)).expanduser()
|
||||||
|
|
||||||
|
# Fall back to old format
|
||||||
|
local_config = config.get("Local", {})
|
||||||
|
if isinstance(local_config, dict):
|
||||||
|
path_str = local_config.get("path")
|
||||||
|
if path_str:
|
||||||
|
return Path(str(path_str)).expanduser()
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_debrid_api_key(config: Dict[str, Any], service: str = "All-debrid") -> Optional[str]:
|
||||||
|
"""Get Debrid API key from config.
|
||||||
|
|
||||||
|
Supports both formats:
|
||||||
|
- New: config["storage"]["debrid"]["All-debrid"]
|
||||||
|
- Old: config["Debrid"]["All-debrid"]
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config: Configuration dict
|
||||||
|
service: Service name (default: "All-debrid")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
API key string if found, None otherwise
|
||||||
|
"""
|
||||||
|
# Try new format first
|
||||||
|
storage = config.get("storage", {})
|
||||||
|
if isinstance(storage, dict):
|
||||||
|
debrid_config = storage.get("debrid", {})
|
||||||
|
if isinstance(debrid_config, dict):
|
||||||
|
api_key = debrid_config.get(service)
|
||||||
|
if api_key: # Check if not None and not empty
|
||||||
|
return str(api_key).strip() if api_key else None
|
||||||
|
|
||||||
|
# Fall back to old format
|
||||||
|
debrid_config = config.get("Debrid", {})
|
||||||
|
if isinstance(debrid_config, dict):
|
||||||
|
api_key = debrid_config.get(service)
|
||||||
|
if api_key: # Check if not None and not empty
|
||||||
|
return str(api_key).strip() if api_key else None
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_provider_credentials(config: Dict[str, Any], provider: str) -> Optional[Dict[str, str]]:
|
||||||
|
"""Get provider credentials (email/password) from config.
|
||||||
|
|
||||||
|
Supports both formats:
|
||||||
|
- New: config["provider"][provider] = {"email": "...", "password": "..."}
|
||||||
|
- Old: config[provider.capitalize()] = {"email": "...", "password": "..."}
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config: Configuration dict
|
||||||
|
provider: Provider name (e.g., "openlibrary", "soulseek")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with credentials if found, None otherwise
|
||||||
|
"""
|
||||||
|
# Try new format first
|
||||||
|
provider_config = config.get("provider", {})
|
||||||
|
if isinstance(provider_config, dict):
|
||||||
|
creds = provider_config.get(provider.lower(), {})
|
||||||
|
if isinstance(creds, dict) and creds:
|
||||||
|
return creds
|
||||||
|
|
||||||
|
# Fall back to old format (capitalized key)
|
||||||
|
old_key_map = {
|
||||||
|
"openlibrary": "OpenLibrary",
|
||||||
|
"archive": "Archive",
|
||||||
|
"soulseek": "Soulseek",
|
||||||
|
}
|
||||||
|
old_key = old_key_map.get(provider.lower())
|
||||||
|
if old_key:
|
||||||
|
creds = config.get(old_key, {})
|
||||||
|
if isinstance(creds, dict) and creds:
|
||||||
|
return creds
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_cookies_path(config: Dict[str, Any], script_dir: Optional[Path] = None) -> Optional[Path]:
|
||||||
|
value = config.get("cookies") or config.get("Cookies_Path")
|
||||||
|
if value:
|
||||||
|
candidate = Path(str(value)).expanduser()
|
||||||
|
if candidate.is_file():
|
||||||
|
return candidate
|
||||||
|
base_dir = script_dir or SCRIPT_DIR
|
||||||
|
default_path = base_dir / "cookies.txt"
|
||||||
|
if default_path.is_file():
|
||||||
|
return default_path
|
||||||
|
return None
|
||||||
|
|
||||||
|
def resolve_debug_log(config: Dict[str, Any]) -> Optional[Path]:
|
||||||
|
value = config.get("download_debug_log")
|
||||||
|
if not value:
|
||||||
|
return None
|
||||||
|
path = Path(str(value)).expanduser()
|
||||||
|
if not path.is_absolute():
|
||||||
|
path = Path.cwd() / path
|
||||||
|
return path
|
||||||
|
|
||||||
|
def load_config(config_dir: Optional[Path] = None, filename: str = DEFAULT_CONFIG_FILENAME) -> Dict[str, Any]:
|
||||||
|
base_dir = config_dir or SCRIPT_DIR
|
||||||
|
config_path = base_dir / filename
|
||||||
|
cache_key = _make_cache_key(config_dir, filename, config_path)
|
||||||
|
if cache_key in _CONFIG_CACHE:
|
||||||
|
return _CONFIG_CACHE[cache_key]
|
||||||
|
|
||||||
|
try:
|
||||||
|
raw = config_path.read_text(encoding="utf-8")
|
||||||
|
except FileNotFoundError:
|
||||||
|
# Try alternate filename if default not found
|
||||||
|
if filename == DEFAULT_CONFIG_FILENAME:
|
||||||
|
alt_path = base_dir / "downlow.json"
|
||||||
|
try:
|
||||||
|
raw = alt_path.read_text(encoding="utf-8")
|
||||||
|
config_path = alt_path
|
||||||
|
cache_key = _make_cache_key(config_dir, filename, alt_path)
|
||||||
|
except FileNotFoundError:
|
||||||
|
_CONFIG_CACHE[cache_key] = {}
|
||||||
|
return {}
|
||||||
|
except OSError as exc:
|
||||||
|
log(f"Failed to read {alt_path}: {exc}")
|
||||||
|
_CONFIG_CACHE[cache_key] = {}
|
||||||
|
return {}
|
||||||
|
else:
|
||||||
|
_CONFIG_CACHE[cache_key] = {}
|
||||||
|
return {}
|
||||||
|
except OSError as exc:
|
||||||
|
log(f"Failed to read {config_path}: {exc}")
|
||||||
|
_CONFIG_CACHE[cache_key] = {}
|
||||||
|
return {}
|
||||||
|
|
||||||
|
raw = raw.strip()
|
||||||
|
if not raw:
|
||||||
|
_CONFIG_CACHE[cache_key] = {}
|
||||||
|
return {}
|
||||||
|
try:
|
||||||
|
data = json.loads(raw)
|
||||||
|
except json.JSONDecodeError as exc:
|
||||||
|
log(f"Invalid JSON in {config_path}: {exc}")
|
||||||
|
_CONFIG_CACHE[cache_key] = {}
|
||||||
|
return {}
|
||||||
|
if not isinstance(data, dict):
|
||||||
|
log(f"Expected object in {config_path}, got {type(data).__name__}")
|
||||||
|
_CONFIG_CACHE[cache_key] = {}
|
||||||
|
return {}
|
||||||
|
|
||||||
|
_CONFIG_CACHE[cache_key] = data
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def reload_config(config_dir: Optional[Path] = None, filename: str = DEFAULT_CONFIG_FILENAME) -> Dict[str, Any]:
|
||||||
|
cache_key = _make_cache_key(config_dir, filename, None)
|
||||||
|
_CONFIG_CACHE.pop(cache_key, None)
|
||||||
|
return load_config(config_dir=config_dir, filename=filename)
|
||||||
|
|
||||||
|
|
||||||
|
def clear_config_cache() -> None:
|
||||||
|
_CONFIG_CACHE.clear()
|
||||||
|
|
||||||
|
def save_config(
|
||||||
|
config: Dict[str, Any],
|
||||||
|
config_dir: Optional[Path] = None,
|
||||||
|
filename: str = DEFAULT_CONFIG_FILENAME,
|
||||||
|
) -> None:
|
||||||
|
base_dir = config_dir or SCRIPT_DIR
|
||||||
|
config_path = base_dir / filename
|
||||||
|
|
||||||
|
# Load existing config to preserve keys that aren't being changed
|
||||||
|
try:
|
||||||
|
existing_raw = config_path.read_text(encoding="utf-8")
|
||||||
|
existing_data = json.loads(existing_raw.strip())
|
||||||
|
if isinstance(existing_data, dict):
|
||||||
|
# Merge: existing config as base, then overlay with new config
|
||||||
|
merged = existing_data.copy()
|
||||||
|
merged.update(config)
|
||||||
|
config = merged
|
||||||
|
except (FileNotFoundError, OSError, json.JSONDecodeError):
|
||||||
|
# File doesn't exist or is invalid, use provided config as-is
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
config_path.write_text(
|
||||||
|
json.dumps(config, ensure_ascii=False, indent=2, sort_keys=True) + "\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
except OSError as exc:
|
||||||
|
raise RuntimeError(f"Failed to write config to {config_path}: {exc}") from exc
|
||||||
|
|
||||||
|
cache_key = _make_cache_key(config_dir, filename, config_path)
|
||||||
|
_CONFIG_CACHE[cache_key] = config
|
||||||
|
|
||||||
|
def load() -> Dict[str, Any]:
|
||||||
|
"""Return the parsed downlow configuration."""
|
||||||
|
return load_config()
|
||||||
|
|
||||||
|
def save(config: Dict[str, Any]) -> None:
|
||||||
|
"""Persist *config* back to disk."""
|
||||||
|
save_config(config)
|
||||||
92
helper/__init__.py
Normal file
92
helper/__init__.py
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
"""Helper modules for the downlow mpv integration."""
|
||||||
|
from . import hydrus as _hydrus
|
||||||
|
from . import download as _download
|
||||||
|
from . import tasks as _tasks
|
||||||
|
from . import utils as _utils
|
||||||
|
|
||||||
|
try: # Optional dependency on Playwright
|
||||||
|
from . import webshot as _webshot
|
||||||
|
except Exception as exc: # pragma: no cover - surfaced when Playwright is missing
|
||||||
|
_webshot = None # type: ignore
|
||||||
|
ScreenshotError = None # type: ignore[assignment]
|
||||||
|
ScreenshotOptions = None # type: ignore[assignment]
|
||||||
|
ScreenshotResult = None # type: ignore[assignment]
|
||||||
|
capture_screenshot = None # type: ignore[assignment]
|
||||||
|
ScreenshotImportError = exc # type: ignore[assignment]
|
||||||
|
else:
|
||||||
|
ScreenshotError = _webshot.ScreenshotError
|
||||||
|
ScreenshotOptions = _webshot.ScreenshotOptions
|
||||||
|
ScreenshotResult = _webshot.ScreenshotResult
|
||||||
|
capture_screenshot = _webshot.capture_screenshot
|
||||||
|
ScreenshotImportError = None
|
||||||
|
# CBOR utilities
|
||||||
|
decode_cbor = _utils.decode_cbor
|
||||||
|
jsonify = _utils.jsonify
|
||||||
|
# General utilities
|
||||||
|
CHUNK_SIZE = _utils.CHUNK_SIZE
|
||||||
|
ensure_directory = _utils.ensure_directory
|
||||||
|
unique_path = _utils.unique_path
|
||||||
|
download_hydrus_file = _hydrus.download_hydrus_file
|
||||||
|
sanitize_metadata_value = _utils.sanitize_metadata_value
|
||||||
|
unique_preserve_order = _utils.unique_preserve_order
|
||||||
|
sha256_file = _utils.sha256_file
|
||||||
|
create_metadata_sidecar = _utils.create_metadata_sidecar
|
||||||
|
create_tags_sidecar = _utils.create_tags_sidecar
|
||||||
|
# Format utilities
|
||||||
|
format_bytes = _utils.format_bytes
|
||||||
|
format_duration = _utils.format_duration
|
||||||
|
format_timestamp = _utils.format_timestamp
|
||||||
|
format_metadata_value = _utils.format_metadata_value
|
||||||
|
# Link utilities
|
||||||
|
extract_link = _utils.extract_link
|
||||||
|
extract_link_from_args = _utils.extract_link_from_args
|
||||||
|
extract_link_from_result = _utils.extract_link_from_result
|
||||||
|
get_api_key = _utils.get_api_key
|
||||||
|
add_direct_link_to_result = _utils.add_direct_link_to_result
|
||||||
|
# URL policy utilities
|
||||||
|
resolve_url_policy = _utils.resolve_url_policy
|
||||||
|
UrlPolicy = _utils.UrlPolicy
|
||||||
|
# Download utilities
|
||||||
|
DownloadOptions = _download.DownloadOptions
|
||||||
|
DownloadError = _download.DownloadError
|
||||||
|
DownloadMediaResult = _download.DownloadMediaResult
|
||||||
|
download_media = _download.download_media
|
||||||
|
is_url_supported_by_ytdlp = _download.is_url_supported_by_ytdlp
|
||||||
|
probe_url = _download.probe_url
|
||||||
|
# Hydrus utilities
|
||||||
|
hydrus_request = _hydrus.hydrus_request
|
||||||
|
hydrus_export = _hydrus.hydrus_export
|
||||||
|
HydrusClient = _hydrus.HydrusClient
|
||||||
|
HydrusRequestError = _hydrus.HydrusRequestError
|
||||||
|
connect_ipc = _tasks.connect_ipc
|
||||||
|
ipc_sender = _tasks.ipc_sender
|
||||||
|
__all__ = [
|
||||||
|
'decode_cbor',
|
||||||
|
'jsonify',
|
||||||
|
'CHUNK_SIZE',
|
||||||
|
'ensure_directory',
|
||||||
|
'unique_path',
|
||||||
|
'download_hydrus_file',
|
||||||
|
'sanitize_metadata_value',
|
||||||
|
'unique_preserve_order',
|
||||||
|
'sha256_file',
|
||||||
|
'resolve_url_policy',
|
||||||
|
'UrlPolicy',
|
||||||
|
'ScreenshotError',
|
||||||
|
'ScreenshotOptions',
|
||||||
|
'ScreenshotResult',
|
||||||
|
'capture_screenshot',
|
||||||
|
'ScreenshotImportError',
|
||||||
|
'DownloadOptions',
|
||||||
|
'DownloadError',
|
||||||
|
'DownloadMediaResult',
|
||||||
|
'download_media',
|
||||||
|
'is_url_supported_by_ytdlp',
|
||||||
|
'probe_url',
|
||||||
|
'HydrusClient',
|
||||||
|
'HydrusRequestError',
|
||||||
|
'hydrus_request',
|
||||||
|
'hydrus_export',
|
||||||
|
'connect_ipc',
|
||||||
|
'ipc_sender',
|
||||||
|
]
|
||||||
130
helper/adjective.json
Normal file
130
helper/adjective.json
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
{
|
||||||
|
"Occult": [
|
||||||
|
"esoterica",
|
||||||
|
"ritual",
|
||||||
|
"alchemy",
|
||||||
|
"magic",
|
||||||
|
"hermetic",
|
||||||
|
"divination",
|
||||||
|
"grimoires",
|
||||||
|
"symbolism",
|
||||||
|
"ceremony"
|
||||||
|
],
|
||||||
|
"Philosophy": [
|
||||||
|
"ethics",
|
||||||
|
"metaphysics",
|
||||||
|
"epistemology",
|
||||||
|
"logic",
|
||||||
|
"existentialism",
|
||||||
|
"stoicism",
|
||||||
|
"phenomenology",
|
||||||
|
"dialectic",
|
||||||
|
"aesthetics"
|
||||||
|
],
|
||||||
|
"Mystery": [
|
||||||
|
"investigation",
|
||||||
|
"crime",
|
||||||
|
"detective",
|
||||||
|
"noir",
|
||||||
|
"thriller",
|
||||||
|
"suspense",
|
||||||
|
"conspiracy",
|
||||||
|
"whodunit",
|
||||||
|
"clues"
|
||||||
|
],
|
||||||
|
"Religion": [
|
||||||
|
"scripture",
|
||||||
|
"theology",
|
||||||
|
"worship",
|
||||||
|
"ritual",
|
||||||
|
"doctrine",
|
||||||
|
"faith",
|
||||||
|
"tradition",
|
||||||
|
"liturgy",
|
||||||
|
"sacred"
|
||||||
|
],
|
||||||
|
"Mythology": [
|
||||||
|
"gods",
|
||||||
|
"creation",
|
||||||
|
"heroes",
|
||||||
|
"legends",
|
||||||
|
"folklore",
|
||||||
|
"pantheon",
|
||||||
|
"epic",
|
||||||
|
"mythic",
|
||||||
|
"archetype"
|
||||||
|
],
|
||||||
|
"Science": [
|
||||||
|
"research",
|
||||||
|
"experiment",
|
||||||
|
"theory",
|
||||||
|
"biology",
|
||||||
|
"physics",
|
||||||
|
"chemistry",
|
||||||
|
"data",
|
||||||
|
"method",
|
||||||
|
"innovation"
|
||||||
|
],
|
||||||
|
"Art": [
|
||||||
|
"visual",
|
||||||
|
"painting",
|
||||||
|
"sculpture",
|
||||||
|
"modernism",
|
||||||
|
"technique",
|
||||||
|
"studio",
|
||||||
|
"curation",
|
||||||
|
"expression",
|
||||||
|
"composition"
|
||||||
|
],
|
||||||
|
"Literature": [
|
||||||
|
"fiction",
|
||||||
|
"poetry",
|
||||||
|
"novel",
|
||||||
|
"criticism",
|
||||||
|
"narrative",
|
||||||
|
"prose",
|
||||||
|
"drama",
|
||||||
|
"canonical",
|
||||||
|
"translation"
|
||||||
|
],
|
||||||
|
"History": [
|
||||||
|
"archaeology",
|
||||||
|
"chronicle",
|
||||||
|
"period",
|
||||||
|
"empire",
|
||||||
|
"revolution",
|
||||||
|
"archive",
|
||||||
|
"heritage",
|
||||||
|
"historiography",
|
||||||
|
"timeline"
|
||||||
|
],
|
||||||
|
"Psychology": [
|
||||||
|
"cognition",
|
||||||
|
"behavior",
|
||||||
|
"therapy",
|
||||||
|
"development",
|
||||||
|
"neuroscience",
|
||||||
|
"personality",
|
||||||
|
"perception",
|
||||||
|
"emotion",
|
||||||
|
"motivation"
|
||||||
|
],
|
||||||
|
"gnostic": [
|
||||||
|
"religion",
|
||||||
|
"scripture",
|
||||||
|
"gnostic",
|
||||||
|
"gospel",
|
||||||
|
"wisdom",
|
||||||
|
"spirituality",
|
||||||
|
"ancient",
|
||||||
|
"philosophy",
|
||||||
|
"esoteric",
|
||||||
|
"mysticism",
|
||||||
|
"mythology",
|
||||||
|
"theology",
|
||||||
|
"sacred",
|
||||||
|
"divine",
|
||||||
|
"apocrapha",
|
||||||
|
"gnosticism"
|
||||||
|
]
|
||||||
|
}
|
||||||
829
helper/alldebrid.py
Normal file
829
helper/alldebrid.py
Normal file
@@ -0,0 +1,829 @@
|
|||||||
|
"""AllDebrid API integration for converting free links to direct downloads.
|
||||||
|
|
||||||
|
AllDebrid is a debrid service that unlocks free file hosters and provides direct download links.
|
||||||
|
API docs: https://docs.alldebrid.com/#general-informations
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from helper.logger import log, debug
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, Optional, Set, List, Sequence
|
||||||
|
from urllib.parse import urlencode, urlparse
|
||||||
|
from .http_client import HTTPClient
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class AllDebridError(Exception):
|
||||||
|
"""Raised when AllDebrid API request fails."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# Cache for supported hosters (domain -> host info)
|
||||||
|
_SUPPORTED_HOSTERS_CACHE: Optional[Dict[str, Dict[str, Any]]] = None
|
||||||
|
_CACHE_TIMESTAMP: float = 0
|
||||||
|
_CACHE_DURATION: float = 3600 # 1 hour
|
||||||
|
|
||||||
|
|
||||||
|
class AllDebridClient:
|
||||||
|
"""Client for AllDebrid API."""
|
||||||
|
|
||||||
|
# Try both v4 and v3 APIs
|
||||||
|
BASE_URLS = [
|
||||||
|
"https://api.alldebrid.com/v4",
|
||||||
|
"https://api.alldebrid.com/v3",
|
||||||
|
]
|
||||||
|
|
||||||
|
def __init__(self, api_key: str):
|
||||||
|
"""Initialize AllDebrid client with API key.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
api_key: AllDebrid API key from config
|
||||||
|
"""
|
||||||
|
self.api_key = api_key.strip()
|
||||||
|
if not self.api_key:
|
||||||
|
raise AllDebridError("AllDebrid API key is empty")
|
||||||
|
self.base_url = self.BASE_URLS[0] # Start with v4
|
||||||
|
|
||||||
|
def _request(self, endpoint: str, params: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
|
||||||
|
"""Make a request to AllDebrid API.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
endpoint: API endpoint (e.g., "user/profile", "link/unlock")
|
||||||
|
params: Query parameters
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Parsed JSON response
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
AllDebridError: If request fails or API returns error
|
||||||
|
"""
|
||||||
|
if params is None:
|
||||||
|
params = {}
|
||||||
|
|
||||||
|
# Add API key to params
|
||||||
|
params['apikey'] = self.api_key
|
||||||
|
|
||||||
|
url = f"{self.base_url}/{endpoint}"
|
||||||
|
query_string = urlencode(params)
|
||||||
|
full_url = f"{url}?{query_string}"
|
||||||
|
|
||||||
|
logger.debug(f"[AllDebrid] {endpoint} request to {full_url[:80]}...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Pass timeout to HTTPClient init, not to get()
|
||||||
|
with HTTPClient(timeout=30.0, headers={'User-Agent': 'downlow/1.0'}) as client:
|
||||||
|
try:
|
||||||
|
response = client.get(full_url)
|
||||||
|
response.raise_for_status()
|
||||||
|
except Exception as req_err:
|
||||||
|
# Log detailed error info
|
||||||
|
logger.error(f"[AllDebrid] Request error to {full_url[:80]}: {req_err}", exc_info=True)
|
||||||
|
if hasattr(req_err, 'response') and req_err.response is not None: # type: ignore
|
||||||
|
try:
|
||||||
|
error_body = req_err.response.content.decode('utf-8') # type: ignore
|
||||||
|
logger.error(f"[AllDebrid] Response body: {error_body[:200]}")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
raise
|
||||||
|
|
||||||
|
data = json.loads(response.content.decode('utf-8'))
|
||||||
|
logger.debug(f"[AllDebrid] Response status: {response.status_code}")
|
||||||
|
|
||||||
|
# Check for API errors
|
||||||
|
if data.get('status') == 'error':
|
||||||
|
error_msg = data.get('error', {}).get('message', 'Unknown error')
|
||||||
|
logger.error(f"[AllDebrid] API error: {error_msg}")
|
||||||
|
raise AllDebridError(f"AllDebrid API error: {error_msg}")
|
||||||
|
|
||||||
|
return data
|
||||||
|
except AllDebridError:
|
||||||
|
raise
|
||||||
|
except Exception as exc:
|
||||||
|
error_msg = f"AllDebrid request failed: {exc}"
|
||||||
|
logger.error(f"[AllDebrid] {error_msg}", exc_info=True)
|
||||||
|
raise AllDebridError(error_msg)
|
||||||
|
|
||||||
|
def unlock_link(self, link: str) -> Optional[str]:
|
||||||
|
"""Unlock a restricted link and get direct download URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
link: Restricted link to unlock
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Direct download URL, or None if already unrestricted
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
AllDebridError: If unlock fails
|
||||||
|
"""
|
||||||
|
if not link.startswith(('http://', 'https://')):
|
||||||
|
raise AllDebridError(f"Invalid URL: {link}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = self._request('link/unlock', {'link': link})
|
||||||
|
|
||||||
|
# Check if unlock was successful
|
||||||
|
if response.get('status') == 'success':
|
||||||
|
data = response.get('data', {})
|
||||||
|
|
||||||
|
# AllDebrid returns the download info in 'link' field
|
||||||
|
if 'link' in data:
|
||||||
|
return data['link']
|
||||||
|
|
||||||
|
# Alternative: check for 'file' field
|
||||||
|
if 'file' in data:
|
||||||
|
return data['file']
|
||||||
|
|
||||||
|
# If no direct link, return the input link
|
||||||
|
return link
|
||||||
|
|
||||||
|
return None
|
||||||
|
except AllDebridError:
|
||||||
|
raise
|
||||||
|
except Exception as exc:
|
||||||
|
raise AllDebridError(f"Failed to unlock link: {exc}")
|
||||||
|
|
||||||
|
def check_host(self, hostname: str) -> Dict[str, Any]:
|
||||||
|
"""Check if a host is supported by AllDebrid.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
hostname: Hostname to check (e.g., "uploadhaven.com")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Host information dict with support status
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
AllDebridError: If request fails
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
response = self._request('host', {'name': hostname})
|
||||||
|
|
||||||
|
if response.get('status') == 'success':
|
||||||
|
return response.get('data', {})
|
||||||
|
|
||||||
|
return {}
|
||||||
|
except AllDebridError:
|
||||||
|
raise
|
||||||
|
except Exception as exc:
|
||||||
|
raise AllDebridError(f"Failed to check host: {exc}")
|
||||||
|
|
||||||
|
def get_user_info(self) -> Dict[str, Any]:
|
||||||
|
"""Get current user account information.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
User information dict
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
AllDebridError: If request fails
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
response = self._request('user/profile')
|
||||||
|
|
||||||
|
if response.get('status') == 'success':
|
||||||
|
return response.get('data', {})
|
||||||
|
|
||||||
|
return {}
|
||||||
|
except AllDebridError:
|
||||||
|
raise
|
||||||
|
except Exception as exc:
|
||||||
|
raise AllDebridError(f"Failed to get user info: {exc}")
|
||||||
|
|
||||||
|
def get_supported_hosters(self) -> Dict[str, Dict[str, Any]]:
|
||||||
|
"""Get list of all supported hosters from AllDebrid API.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict mapping domain to host info (status, name, etc)
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
AllDebridError: If request fails
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
response = self._request('hosts/domains')
|
||||||
|
|
||||||
|
if response.get('status') == 'success':
|
||||||
|
data = response.get('data', {})
|
||||||
|
# The API returns hosts keyed by domain
|
||||||
|
return data if isinstance(data, dict) else {}
|
||||||
|
|
||||||
|
return {}
|
||||||
|
except AllDebridError:
|
||||||
|
raise
|
||||||
|
except Exception as exc:
|
||||||
|
raise AllDebridError(f"Failed to get supported hosters: {exc}")
|
||||||
|
|
||||||
|
def magnet_add(self, magnet_uri: str) -> Dict[str, Any]:
|
||||||
|
"""Submit a magnet link or torrent hash to AllDebrid for processing.
|
||||||
|
|
||||||
|
AllDebrid will download the torrent content and store it in the account.
|
||||||
|
Processing time varies based on torrent size and availability.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
magnet_uri: Magnet URI (magnet:?xt=urn:btih:...) or torrent hash
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with magnet info:
|
||||||
|
- id: Magnet ID (int) - needed for status checks
|
||||||
|
- name: Torrent name
|
||||||
|
- hash: Torrent hash
|
||||||
|
- size: Total file size (bytes)
|
||||||
|
- ready: Boolean - True if already available
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
AllDebridError: If submit fails (requires premium, invalid magnet, etc)
|
||||||
|
"""
|
||||||
|
if not magnet_uri:
|
||||||
|
raise AllDebridError("Magnet URI is empty")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# API endpoint: POST /v4/magnet/upload
|
||||||
|
# Format: /magnet/upload?apikey=key&magnets[]=magnet:?xt=...
|
||||||
|
response = self._request('magnet/upload', {'magnets[]': magnet_uri})
|
||||||
|
|
||||||
|
if response.get('status') == 'success':
|
||||||
|
data = response.get('data', {})
|
||||||
|
magnets = data.get('magnets', [])
|
||||||
|
|
||||||
|
if magnets and len(magnets) > 0:
|
||||||
|
magnet_info = magnets[0]
|
||||||
|
|
||||||
|
# Check for errors in the magnet response
|
||||||
|
if 'error' in magnet_info:
|
||||||
|
error = magnet_info['error']
|
||||||
|
error_msg = error.get('message', 'Unknown error')
|
||||||
|
raise AllDebridError(f"Magnet error: {error_msg}")
|
||||||
|
|
||||||
|
return magnet_info
|
||||||
|
|
||||||
|
raise AllDebridError("No magnet data in response")
|
||||||
|
|
||||||
|
raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
|
||||||
|
except AllDebridError:
|
||||||
|
raise
|
||||||
|
except Exception as exc:
|
||||||
|
raise AllDebridError(f"Failed to submit magnet: {exc}")
|
||||||
|
|
||||||
|
def magnet_status(self, magnet_id: int, include_files: bool = False) -> Dict[str, Any]:
|
||||||
|
"""Get status of a magnet currently being processed or stored.
|
||||||
|
|
||||||
|
Status codes:
|
||||||
|
0-3: Processing (in queue, downloading, compressing, uploading)
|
||||||
|
4: Ready (files available for download)
|
||||||
|
5-15: Error (upload failed, not downloaded in 20min, too big, etc)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
magnet_id: Magnet ID from magnet_add()
|
||||||
|
include_files: If True, includes file list in response
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with status info:
|
||||||
|
- id: Magnet ID
|
||||||
|
- filename: Torrent name
|
||||||
|
- size: Total size (bytes)
|
||||||
|
- status: Human-readable status
|
||||||
|
- statusCode: Numeric code (0-15)
|
||||||
|
- downloaded: Bytes downloaded so far
|
||||||
|
- uploaded: Bytes uploaded so far
|
||||||
|
- seeders: Number of seeders
|
||||||
|
- downloadSpeed: Current speed (bytes/sec)
|
||||||
|
- uploadSpeed: Current speed (bytes/sec)
|
||||||
|
- files: (optional) Array of file objects when include_files=True
|
||||||
|
Each file: {n: name, s: size, l: download_link}
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
AllDebridError: If status check fails
|
||||||
|
"""
|
||||||
|
if not isinstance(magnet_id, int) or magnet_id <= 0:
|
||||||
|
raise AllDebridError(f"Invalid magnet ID: {magnet_id}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Use v4.1 endpoint for better response format
|
||||||
|
# Temporarily override base_url for this request
|
||||||
|
old_base = self.base_url
|
||||||
|
self.base_url = "https://api.alldebrid.com/v4.1"
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = self._request('magnet/status', {'id': str(magnet_id)})
|
||||||
|
finally:
|
||||||
|
self.base_url = old_base
|
||||||
|
|
||||||
|
if response.get('status') == 'success':
|
||||||
|
data = response.get('data', {})
|
||||||
|
magnets = data.get('magnets', {})
|
||||||
|
|
||||||
|
# Handle both list and dict responses
|
||||||
|
if isinstance(magnets, list) and len(magnets) > 0:
|
||||||
|
return magnets[0]
|
||||||
|
elif isinstance(magnets, dict) and magnets:
|
||||||
|
return magnets
|
||||||
|
|
||||||
|
raise AllDebridError(f"No magnet found with ID {magnet_id}")
|
||||||
|
|
||||||
|
raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
|
||||||
|
except AllDebridError:
|
||||||
|
raise
|
||||||
|
except Exception as exc:
|
||||||
|
raise AllDebridError(f"Failed to get magnet status: {exc}")
|
||||||
|
|
||||||
|
def magnet_status_live(self, magnet_id: int, session: int = None, counter: int = 0) -> Dict[str, Any]:
|
||||||
|
"""Get live status of a magnet using delta sync mode.
|
||||||
|
|
||||||
|
The live mode endpoint provides real-time progress by only sending
|
||||||
|
deltas (changed fields) instead of full status on each call. This
|
||||||
|
reduces bandwidth and server load compared to regular polling.
|
||||||
|
|
||||||
|
Note: The "live" designation refers to the delta-sync mode where you
|
||||||
|
maintain state locally and apply diffs from the API, not a streaming
|
||||||
|
endpoint. Regular magnet_status() polling is simpler for single magnets.
|
||||||
|
|
||||||
|
Docs: https://docs.alldebrid.com/#get-status-live-mode
|
||||||
|
|
||||||
|
Args:
|
||||||
|
magnet_id: Magnet ID from magnet_add()
|
||||||
|
session: Session ID (use same ID across multiple calls). If None, will query current status
|
||||||
|
counter: Counter value from previous response (starts at 0)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with magnet status. May contain only changed fields if counter > 0.
|
||||||
|
For single-magnet tracking, use magnet_status() instead.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
AllDebridError: If request fails
|
||||||
|
"""
|
||||||
|
if not isinstance(magnet_id, int) or magnet_id <= 0:
|
||||||
|
raise AllDebridError(f"Invalid magnet ID: {magnet_id}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# For single magnet queries, just use regular endpoint with ID
|
||||||
|
# The "live mode" with session/counter is for multi-magnet dashboards
|
||||||
|
# where bandwidth savings from diffs matter
|
||||||
|
response = self._request('magnet/status', {'id': magnet_id})
|
||||||
|
|
||||||
|
if response.get('status') == 'success':
|
||||||
|
data = response.get('data', {})
|
||||||
|
magnets = data.get('magnets', [])
|
||||||
|
|
||||||
|
# Handle list response
|
||||||
|
if isinstance(magnets, list) and len(magnets) > 0:
|
||||||
|
return magnets[0]
|
||||||
|
|
||||||
|
raise AllDebridError(f"No magnet found with ID {magnet_id}")
|
||||||
|
|
||||||
|
raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
|
||||||
|
except AllDebridError:
|
||||||
|
raise
|
||||||
|
except Exception as exc:
|
||||||
|
raise AllDebridError(f"Failed to get magnet live status: {exc}")
|
||||||
|
|
||||||
|
def magnet_links(self, magnet_ids: list) -> Dict[str, Any]:
|
||||||
|
"""Get files and download links for one or more magnets.
|
||||||
|
|
||||||
|
Use this after magnet_status shows statusCode == 4 (Ready).
|
||||||
|
Returns the file tree structure with direct download links.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
magnet_ids: List of magnet IDs to get files for
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict mapping magnet_id (as string) -> magnet_info:
|
||||||
|
- id: Magnet ID
|
||||||
|
- files: Array of file/folder objects
|
||||||
|
File: {n: name, s: size, l: direct_download_link}
|
||||||
|
Folder: {n: name, e: [sub_items]}
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
AllDebridError: If request fails
|
||||||
|
"""
|
||||||
|
if not magnet_ids:
|
||||||
|
raise AllDebridError("No magnet IDs provided")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Build parameter: id[]=123&id[]=456 style
|
||||||
|
params = {}
|
||||||
|
for i, magnet_id in enumerate(magnet_ids):
|
||||||
|
params[f'id[{i}]'] = str(magnet_id)
|
||||||
|
|
||||||
|
response = self._request('magnet/files', params)
|
||||||
|
|
||||||
|
if response.get('status') == 'success':
|
||||||
|
data = response.get('data', {})
|
||||||
|
magnets = data.get('magnets', [])
|
||||||
|
|
||||||
|
# Convert list to dict keyed by ID (as string) for easier access
|
||||||
|
result = {}
|
||||||
|
for magnet_info in magnets:
|
||||||
|
magnet_id = magnet_info.get('id')
|
||||||
|
if magnet_id:
|
||||||
|
result[str(magnet_id)] = magnet_info
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
|
||||||
|
except AllDebridError:
|
||||||
|
raise
|
||||||
|
except Exception as exc:
|
||||||
|
raise AllDebridError(f"Failed to get magnet files: {exc}")
|
||||||
|
|
||||||
|
def instant_available(self, magnet_hash: str) -> Optional[List[Dict[str, Any]]]:
|
||||||
|
"""Check if magnet is available for instant streaming without downloading.
|
||||||
|
|
||||||
|
AllDebrid's "instant" feature checks if a magnet can be streamed directly
|
||||||
|
without downloading all the data. Returns available video/audio files.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
magnet_hash: Torrent hash (with or without magnet: prefix)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of available files for streaming, or None if not available
|
||||||
|
Each file: {n: name, s: size, e: extension, t: type}
|
||||||
|
Returns empty list if torrent not found or not available
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
AllDebridError: If API request fails
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Parse magnet hash if needed
|
||||||
|
if magnet_hash.startswith('magnet:'):
|
||||||
|
# Extract hash from magnet URI
|
||||||
|
import re
|
||||||
|
match = re.search(r'xt=urn:btih:([a-fA-F0-9]+)', magnet_hash)
|
||||||
|
if not match:
|
||||||
|
return None
|
||||||
|
hash_value = match.group(1)
|
||||||
|
else:
|
||||||
|
hash_value = magnet_hash.strip()
|
||||||
|
|
||||||
|
if not hash_value or len(hash_value) < 32:
|
||||||
|
return None
|
||||||
|
|
||||||
|
response = self._request('magnet/instant', {'magnet': hash_value})
|
||||||
|
|
||||||
|
if response.get('status') == 'success':
|
||||||
|
data = response.get('data', {})
|
||||||
|
# Returns 'files' array if available, or empty
|
||||||
|
return data.get('files', [])
|
||||||
|
|
||||||
|
# Not available is not an error, just return empty list
|
||||||
|
return []
|
||||||
|
|
||||||
|
except AllDebridError:
|
||||||
|
raise
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug(f"[AllDebrid] instant_available check failed: {exc}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def magnet_delete(self, magnet_id: int) -> bool:
|
||||||
|
"""Delete a magnet from the AllDebrid account.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
magnet_id: Magnet ID to delete
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if deletion was successful
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
AllDebridError: If deletion fails
|
||||||
|
"""
|
||||||
|
if not isinstance(magnet_id, int) or magnet_id <= 0:
|
||||||
|
raise AllDebridError(f"Invalid magnet ID: {magnet_id}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = self._request('magnet/delete', {'id': str(magnet_id)})
|
||||||
|
|
||||||
|
if response.get('status') == 'success':
|
||||||
|
return True
|
||||||
|
|
||||||
|
raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
|
||||||
|
except AllDebridError:
|
||||||
|
raise
|
||||||
|
except Exception as exc:
|
||||||
|
raise AllDebridError(f"Failed to delete magnet: {exc}")
|
||||||
|
|
||||||
|
|
||||||
|
def _get_cached_supported_hosters(api_key: str) -> Set[str]:
|
||||||
|
"""Get cached list of supported hoster domains.
|
||||||
|
|
||||||
|
Uses AllDebrid API to fetch the list once per hour,
|
||||||
|
caching the result to avoid repeated API calls.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
api_key: AllDebrid API key
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Set of supported domain names (lowercased)
|
||||||
|
"""
|
||||||
|
global _SUPPORTED_HOSTERS_CACHE, _CACHE_TIMESTAMP
|
||||||
|
|
||||||
|
now = time.time()
|
||||||
|
|
||||||
|
# Return cached result if still valid
|
||||||
|
if _SUPPORTED_HOSTERS_CACHE is not None and (now - _CACHE_TIMESTAMP) < _CACHE_DURATION:
|
||||||
|
return set(_SUPPORTED_HOSTERS_CACHE.keys())
|
||||||
|
|
||||||
|
# Fetch fresh list from API
|
||||||
|
try:
|
||||||
|
client = AllDebridClient(api_key)
|
||||||
|
hosters_dict = client.get_supported_hosters()
|
||||||
|
|
||||||
|
if hosters_dict:
|
||||||
|
# API returns: hosts (list), streams (list), redirectors (list)
|
||||||
|
# Combine all into a single set
|
||||||
|
all_domains: Set[str] = set()
|
||||||
|
|
||||||
|
# Add hosts
|
||||||
|
if 'hosts' in hosters_dict and isinstance(hosters_dict['hosts'], list):
|
||||||
|
all_domains.update(hosters_dict['hosts'])
|
||||||
|
|
||||||
|
# Add streams
|
||||||
|
if 'streams' in hosters_dict and isinstance(hosters_dict['streams'], list):
|
||||||
|
all_domains.update(hosters_dict['streams'])
|
||||||
|
|
||||||
|
# Add redirectors
|
||||||
|
if 'redirectors' in hosters_dict and isinstance(hosters_dict['redirectors'], list):
|
||||||
|
all_domains.update(hosters_dict['redirectors'])
|
||||||
|
|
||||||
|
# Cache as dict for consistency
|
||||||
|
_SUPPORTED_HOSTERS_CACHE = {domain: {} for domain in all_domains}
|
||||||
|
_CACHE_TIMESTAMP = now
|
||||||
|
|
||||||
|
if all_domains:
|
||||||
|
debug(f"✓ Cached {len(all_domains)} supported hosters")
|
||||||
|
|
||||||
|
return all_domains
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"⚠ Failed to fetch supported hosters: {exc}", file=sys.stderr)
|
||||||
|
# Return any cached hosters even if expired
|
||||||
|
if _SUPPORTED_HOSTERS_CACHE:
|
||||||
|
return set(_SUPPORTED_HOSTERS_CACHE.keys())
|
||||||
|
|
||||||
|
# Fallback: empty set if no cache available
|
||||||
|
return set()
|
||||||
|
|
||||||
|
|
||||||
|
def is_link_restrictable_hoster(url: str, api_key: str) -> bool:
|
||||||
|
"""Check if a URL is from a hoster that AllDebrid can unlock.
|
||||||
|
|
||||||
|
Intelligently queries the AllDebrid API to detect if the URL is
|
||||||
|
from a supported restricted hoster.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: URL to check
|
||||||
|
api_key: AllDebrid API key
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if URL is from a supported restrictable hoster
|
||||||
|
"""
|
||||||
|
if not url or not api_key:
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Extract domain from URL
|
||||||
|
parsed = urlparse(url)
|
||||||
|
domain = parsed.netloc.lower()
|
||||||
|
|
||||||
|
# Remove www. prefix for comparison
|
||||||
|
if domain.startswith('www.'):
|
||||||
|
domain = domain[4:]
|
||||||
|
|
||||||
|
# Get supported hosters (cached)
|
||||||
|
supported = _get_cached_supported_hosters(api_key)
|
||||||
|
|
||||||
|
if not supported:
|
||||||
|
# API check failed, fall back to manual detection
|
||||||
|
# Check for common restricted hosters
|
||||||
|
common_hosters = {
|
||||||
|
'uploadhaven.com', 'uploaded.to', 'uploaded.net',
|
||||||
|
'datafile.com', 'rapidfile.io', 'nitroflare.com',
|
||||||
|
'1fichier.com', 'mega.nz', 'mediafire.com'
|
||||||
|
}
|
||||||
|
return any(host in url.lower() for host in common_hosters)
|
||||||
|
|
||||||
|
# Check if domain is in supported list
|
||||||
|
# Need to check exact match and with/without www
|
||||||
|
return domain in supported or f"www.{domain}" in supported
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"⚠ Hoster detection failed: {exc}", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def convert_link_with_debrid(link: str, api_key: str) -> Optional[str]:
|
||||||
|
"""Convert a restricted link to a direct download URL using AllDebrid.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
link: Restricted link
|
||||||
|
api_key: AllDebrid API key
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Direct download URL, or original link if already unrestricted
|
||||||
|
"""
|
||||||
|
if not api_key:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
client = AllDebridClient(api_key)
|
||||||
|
direct_link = client.unlock_link(link)
|
||||||
|
|
||||||
|
if direct_link and direct_link != link:
|
||||||
|
debug(f"✓ Converted link: {link[:60]}... → {direct_link[:60]}...")
|
||||||
|
return direct_link
|
||||||
|
|
||||||
|
return None
|
||||||
|
except AllDebridError as exc:
|
||||||
|
log(f"⚠ Failed to convert link: {exc}", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"⚠ Unexpected error: {exc}", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def is_magnet_link(uri: str) -> bool:
|
||||||
|
"""Check if a URI is a magnet link.
|
||||||
|
|
||||||
|
Magnet links start with 'magnet:?xt=urn:btih:' or just 'magnet:'
|
||||||
|
|
||||||
|
Args:
|
||||||
|
uri: URI to check
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if URI is a magnet link
|
||||||
|
"""
|
||||||
|
if not uri:
|
||||||
|
return False
|
||||||
|
return uri.lower().startswith('magnet:')
|
||||||
|
|
||||||
|
|
||||||
|
def is_torrent_hash(text: str) -> bool:
|
||||||
|
"""Check if text looks like a torrent hash (40 or 64 hex characters).
|
||||||
|
|
||||||
|
Common formats:
|
||||||
|
- Info hash v1: 40 hex chars (SHA-1)
|
||||||
|
- Info hash v2: 64 hex chars (SHA-256)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: Text to check
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if text matches torrent hash format
|
||||||
|
"""
|
||||||
|
if not text or not isinstance(text, str):
|
||||||
|
return False
|
||||||
|
|
||||||
|
text = text.strip()
|
||||||
|
|
||||||
|
# Check if it's 40 hex chars (SHA-1) or 64 hex chars (SHA-256)
|
||||||
|
if len(text) not in (40, 64):
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Try to parse as hex
|
||||||
|
int(text, 16)
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def is_torrent_file(path: str) -> bool:
|
||||||
|
"""Check if a file path is a .torrent file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: File path to check
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if file has .torrent extension
|
||||||
|
"""
|
||||||
|
if not path:
|
||||||
|
return False
|
||||||
|
return path.lower().endswith('.torrent')
|
||||||
|
|
||||||
|
|
||||||
|
def parse_magnet_or_hash(uri: str) -> Optional[str]:
|
||||||
|
"""Parse a magnet URI or hash into a format for AllDebrid API.
|
||||||
|
|
||||||
|
AllDebrid's magnet/upload endpoint accepts:
|
||||||
|
- Full magnet URIs: magnet:?xt=urn:btih:...
|
||||||
|
- Info hashes: 40 or 64 hex characters
|
||||||
|
|
||||||
|
Args:
|
||||||
|
uri: Magnet URI or hash
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Normalized input for AllDebrid API, or None if invalid
|
||||||
|
"""
|
||||||
|
if not uri:
|
||||||
|
return None
|
||||||
|
|
||||||
|
uri = uri.strip()
|
||||||
|
|
||||||
|
# Already a magnet link - just return it
|
||||||
|
if is_magnet_link(uri):
|
||||||
|
return uri
|
||||||
|
|
||||||
|
# Check if it's a valid hash
|
||||||
|
if is_torrent_hash(uri):
|
||||||
|
return uri
|
||||||
|
|
||||||
|
# Not a recognized format
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Cmdlet: unlock_link
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
def unlock_link_cmdlet(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
"""Unlock a restricted link using AllDebrid.
|
||||||
|
|
||||||
|
Converts free hosters and restricted links to direct download URLs.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
unlock-link <link>
|
||||||
|
unlock-link # Uses URL from pipeline result
|
||||||
|
|
||||||
|
Requires:
|
||||||
|
- AllDebrid API key in config under Debrid.All-debrid
|
||||||
|
|
||||||
|
Args:
|
||||||
|
result: Pipeline result object
|
||||||
|
args: Command arguments
|
||||||
|
config: Configuration dictionary
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
0 on success, 1 on failure
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from .link_utils import (
|
||||||
|
extract_link,
|
||||||
|
get_api_key,
|
||||||
|
add_direct_link_to_result,
|
||||||
|
)
|
||||||
|
except ImportError as e:
|
||||||
|
log(f"Required modules unavailable: {e}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Get link from args or result
|
||||||
|
link = extract_link(result, args)
|
||||||
|
|
||||||
|
if not link:
|
||||||
|
log("No valid URL provided", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Get AllDebrid API key from config
|
||||||
|
api_key = get_api_key(config, "AllDebrid", "Debrid.All-debrid")
|
||||||
|
|
||||||
|
if not api_key:
|
||||||
|
log("AllDebrid API key not configured in Debrid.All-debrid", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Try to unlock the link
|
||||||
|
debug(f"Unlocking: {link}")
|
||||||
|
direct_link = convert_link_with_debrid(link, api_key)
|
||||||
|
|
||||||
|
if direct_link:
|
||||||
|
debug(f"✓ Direct link: {direct_link}")
|
||||||
|
|
||||||
|
# Update result with direct link
|
||||||
|
add_direct_link_to_result(result, direct_link, link)
|
||||||
|
|
||||||
|
# Return the updated result via pipeline context
|
||||||
|
# Note: The cmdlet wrapper will handle emitting to pipeline
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
log(f"❌ Failed to unlock link or already unrestricted", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Cmdlet Registration
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
def _register_unlock_link():
|
||||||
|
"""Register unlock-link command with cmdlet registry if available."""
|
||||||
|
try:
|
||||||
|
from cmdlets import register
|
||||||
|
|
||||||
|
@register(["unlock-link"])
|
||||||
|
def unlock_link_wrapper(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
"""Wrapper to make unlock_link_cmdlet available as cmdlet."""
|
||||||
|
import pipeline as ctx
|
||||||
|
|
||||||
|
ret_code = unlock_link_cmdlet(result, args, config)
|
||||||
|
|
||||||
|
# If successful, emit the result
|
||||||
|
if ret_code == 0:
|
||||||
|
ctx.emit(result)
|
||||||
|
|
||||||
|
return ret_code
|
||||||
|
|
||||||
|
return unlock_link_wrapper
|
||||||
|
except ImportError:
|
||||||
|
# If cmdlets module not available, just return None
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# Register when module is imported
|
||||||
|
_unlock_link_registration = _register_unlock_link()
|
||||||
567
helper/archive_client.py
Normal file
567
helper/archive_client.py
Normal file
@@ -0,0 +1,567 @@
|
|||||||
|
"""Archive.org API client for borrowing and downloading books.
|
||||||
|
|
||||||
|
This module provides low-level functions for interacting with Archive.org:
|
||||||
|
- Authentication (login, credential management)
|
||||||
|
- Borrowing (loan, return_loan)
|
||||||
|
- Book metadata extraction (get_book_infos, get_book_metadata)
|
||||||
|
- Image downloading and deobfuscation
|
||||||
|
- PDF creation with metadata
|
||||||
|
|
||||||
|
Used by unified_book_downloader.py for the borrowing workflow.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from concurrent import futures
|
||||||
|
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from helper.logger import log, debug
|
||||||
|
|
||||||
|
try:
|
||||||
|
from Crypto.Cipher import AES # type: ignore
|
||||||
|
from Crypto.Util import Counter # type: ignore
|
||||||
|
except ImportError:
|
||||||
|
AES = None # type: ignore
|
||||||
|
Counter = None # type: ignore
|
||||||
|
|
||||||
|
try:
|
||||||
|
from tqdm import tqdm # type: ignore
|
||||||
|
except ImportError:
|
||||||
|
tqdm = None # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
def credential_openlibrary(config: Dict[str, Any]) -> Tuple[Optional[str], Optional[str]]:
|
||||||
|
"""Get OpenLibrary/Archive.org email and password from config.
|
||||||
|
|
||||||
|
Supports both formats:
|
||||||
|
- New: {"provider": {"openlibrary": {"email": "...", "password": "..."}}}
|
||||||
|
- Old: {"Archive": {"email": "...", "password": "..."}}
|
||||||
|
{"archive_org_email": "...", "archive_org_password": "..."}
|
||||||
|
|
||||||
|
Returns: (email, password) tuple, each can be None
|
||||||
|
"""
|
||||||
|
if not isinstance(config, dict):
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
# Try new format first
|
||||||
|
provider_config = config.get("provider", {})
|
||||||
|
if isinstance(provider_config, dict):
|
||||||
|
openlibrary_config = provider_config.get("openlibrary", {})
|
||||||
|
if isinstance(openlibrary_config, dict):
|
||||||
|
email = openlibrary_config.get("email")
|
||||||
|
password = openlibrary_config.get("password")
|
||||||
|
if email or password:
|
||||||
|
return email, password
|
||||||
|
|
||||||
|
# Try old nested format
|
||||||
|
archive_config = config.get("Archive")
|
||||||
|
if isinstance(archive_config, dict):
|
||||||
|
email = archive_config.get("email")
|
||||||
|
password = archive_config.get("password")
|
||||||
|
if email or password:
|
||||||
|
return email, password
|
||||||
|
|
||||||
|
# Fall back to old flat format
|
||||||
|
email = config.get("archive_org_email")
|
||||||
|
password = config.get("archive_org_password")
|
||||||
|
return email, password
|
||||||
|
|
||||||
|
|
||||||
|
def display_error(response: requests.Response, message: str) -> None:
|
||||||
|
"""Display error and exit."""
|
||||||
|
log(message, file=sys.stderr)
|
||||||
|
log(response.text, file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def login(email: str, password: str) -> requests.Session:
|
||||||
|
"""Login to archive.org.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
email: Archive.org email
|
||||||
|
password: Archive.org password
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Authenticated requests.Session
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
SystemExit on login failure
|
||||||
|
"""
|
||||||
|
session = requests.Session()
|
||||||
|
session.get("https://archive.org/account/login", timeout=30)
|
||||||
|
|
||||||
|
data = {"username": email, "password": password}
|
||||||
|
response = session.post("https://archive.org/account/login", data=data, timeout=30)
|
||||||
|
|
||||||
|
if "bad_login" in response.text:
|
||||||
|
log("Invalid credentials!", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
if "Successful login" in response.text:
|
||||||
|
debug("Successful login")
|
||||||
|
return session
|
||||||
|
display_error(response, "[-] Error while login:")
|
||||||
|
sys.exit(1) # Unreachable but satisfies type checker
|
||||||
|
|
||||||
|
|
||||||
|
def loan(session: requests.Session, book_id: str, verbose: bool = True) -> requests.Session:
|
||||||
|
"""Borrow a book from archive.org (14-day loan).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
session: Authenticated requests.Session from login()
|
||||||
|
book_id: Archive.org book identifier (e.g., 'ia_book_id')
|
||||||
|
verbose: Whether to log messages
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Session with active loan
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
SystemExit on loan failure
|
||||||
|
"""
|
||||||
|
data = {"action": "grant_access", "identifier": book_id}
|
||||||
|
response = session.post("https://archive.org/services/loans/loan/searchInside.php", data=data, timeout=30)
|
||||||
|
data["action"] = "browse_book"
|
||||||
|
response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
|
||||||
|
|
||||||
|
if response.status_code == 400:
|
||||||
|
try:
|
||||||
|
if response.json()["error"] == "This book is not available to borrow at this time. Please try again later.":
|
||||||
|
debug("This book doesn't need to be borrowed")
|
||||||
|
return session
|
||||||
|
display_error(response, "Something went wrong when trying to borrow the book.")
|
||||||
|
except:
|
||||||
|
display_error(response, "The book cannot be borrowed")
|
||||||
|
|
||||||
|
data["action"] = "create_token"
|
||||||
|
response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
|
||||||
|
|
||||||
|
if "token" in response.text:
|
||||||
|
if verbose:
|
||||||
|
debug("Successful loan")
|
||||||
|
return session
|
||||||
|
display_error(response, "Something went wrong when trying to borrow the book.")
|
||||||
|
sys.exit(1) # Unreachable but satisfies type checker
|
||||||
|
|
||||||
|
|
||||||
|
def return_loan(session: requests.Session, book_id: str) -> None:
|
||||||
|
"""Return a borrowed book.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
session: Authenticated requests.Session with active loan
|
||||||
|
book_id: Archive.org book identifier
|
||||||
|
"""
|
||||||
|
data = {"action": "return_loan", "identifier": book_id}
|
||||||
|
response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
|
||||||
|
if response.status_code == 200 and response.json()["success"]:
|
||||||
|
debug("Book returned")
|
||||||
|
else:
|
||||||
|
display_error(response, "Something went wrong when trying to return the book")
|
||||||
|
|
||||||
|
|
||||||
|
def get_book_infos(session: requests.Session, url: str) -> Tuple[str, List[str], Dict[str, Any]]:
|
||||||
|
"""Extract book information and page links from archive.org viewer.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
session: Authenticated requests.Session
|
||||||
|
url: Book URL (e.g., https://archive.org/borrow/book_id or /details/book_id)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (title, page_links, metadata)
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
RuntimeError: If page data cannot be extracted
|
||||||
|
"""
|
||||||
|
r = session.get(url, timeout=30).text
|
||||||
|
|
||||||
|
# Try to extract the infos URL from the response
|
||||||
|
try:
|
||||||
|
# Look for the "url" field in the response
|
||||||
|
if '"url":"' not in r:
|
||||||
|
raise ValueError("No 'url' field found in response")
|
||||||
|
infos_url = "https:" + r.split('"url":"')[1].split('"')[0].replace("\\u0026", "&")
|
||||||
|
except (IndexError, ValueError) as e:
|
||||||
|
# If URL extraction fails, raise with better error message
|
||||||
|
raise RuntimeError(f"Failed to extract book info URL from response: {e}")
|
||||||
|
|
||||||
|
response = session.get(infos_url, timeout=30)
|
||||||
|
data = response.json()["data"]
|
||||||
|
title = data["brOptions"]["bookTitle"].strip().replace(" ", "_")
|
||||||
|
title = "".join(c for c in title if c not in '<>:"/\\|?*') # Filter forbidden chars
|
||||||
|
title = title[:150] # Trim to avoid long file names
|
||||||
|
metadata = data["metadata"]
|
||||||
|
links = []
|
||||||
|
|
||||||
|
# Safely extract page links from brOptions data
|
||||||
|
try:
|
||||||
|
br_data = data.get("brOptions", {}).get("data", [])
|
||||||
|
for item in br_data:
|
||||||
|
if isinstance(item, list):
|
||||||
|
for page in item:
|
||||||
|
if isinstance(page, dict) and "uri" in page:
|
||||||
|
links.append(page["uri"])
|
||||||
|
elif isinstance(item, dict) and "uri" in item:
|
||||||
|
links.append(item["uri"])
|
||||||
|
except (KeyError, IndexError, TypeError) as e:
|
||||||
|
log(f"Warning: Error parsing page links: {e}", file=sys.stderr)
|
||||||
|
# Continue with whatever links we found
|
||||||
|
|
||||||
|
if len(links) > 1:
|
||||||
|
debug(f"Found {len(links)} pages")
|
||||||
|
return title, links, metadata
|
||||||
|
elif len(links) == 1:
|
||||||
|
debug(f"Found {len(links)} page")
|
||||||
|
return title, links, metadata
|
||||||
|
else:
|
||||||
|
log("Error while getting image links - no pages found", file=sys.stderr)
|
||||||
|
raise RuntimeError("No pages found in book data")
|
||||||
|
|
||||||
|
|
||||||
|
def image_name(pages: int, page: int, directory: str) -> str:
|
||||||
|
"""Generate image filename for page.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pages: Total number of pages
|
||||||
|
page: Current page number (0-indexed)
|
||||||
|
directory: Directory to save to
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Full path to image file
|
||||||
|
"""
|
||||||
|
return f"{directory}/{(len(str(pages)) - len(str(page))) * '0'}{page}.jpg"
|
||||||
|
|
||||||
|
|
||||||
|
def deobfuscate_image(image_data: bytes, link: str, obf_header: str) -> bytes:
|
||||||
|
"""Decrypt obfuscated image data using AES-CTR.
|
||||||
|
|
||||||
|
This handles Archive.org's image obfuscation for borrowed books.
|
||||||
|
Based on: https://github.com/justimm
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_data: Encrypted image bytes
|
||||||
|
link: Image URL (used to derive AES key)
|
||||||
|
obf_header: X-Obfuscate header value (format: "1|BASE64_COUNTER")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Decrypted image bytes
|
||||||
|
"""
|
||||||
|
if not AES or not Counter:
|
||||||
|
raise RuntimeError("Crypto library not available")
|
||||||
|
|
||||||
|
try:
|
||||||
|
version, counter_b64 = obf_header.split("|")
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError("Invalid X-Obfuscate header format") from e
|
||||||
|
|
||||||
|
if version != "1":
|
||||||
|
raise ValueError("Unsupported obfuscation version: " + version)
|
||||||
|
|
||||||
|
# Derive AES key from URL
|
||||||
|
aesKey = re.sub(r"^https?:\/\/.*?\/", "/", link)
|
||||||
|
sha1_digest = hashlib.sha1(aesKey.encode("utf-8")).digest()
|
||||||
|
key = sha1_digest[:16]
|
||||||
|
|
||||||
|
# Decode counter
|
||||||
|
counter_bytes = base64.b64decode(counter_b64)
|
||||||
|
if len(counter_bytes) != 16:
|
||||||
|
raise ValueError(f"Expected counter to be 16 bytes, got {len(counter_bytes)}")
|
||||||
|
|
||||||
|
prefix = counter_bytes[:8]
|
||||||
|
initial_value = int.from_bytes(counter_bytes[8:], byteorder="big")
|
||||||
|
|
||||||
|
# Create AES-CTR cipher
|
||||||
|
ctr = Counter.new(64, prefix=prefix, initial_value=initial_value, little_endian=False) # type: ignore
|
||||||
|
cipher = AES.new(key, AES.MODE_CTR, counter=ctr) # type: ignore
|
||||||
|
|
||||||
|
decrypted_part = cipher.decrypt(image_data[:1024])
|
||||||
|
new_data = decrypted_part + image_data[1024:]
|
||||||
|
return new_data
|
||||||
|
|
||||||
|
|
||||||
|
def download_one_image(
|
||||||
|
session: requests.Session,
|
||||||
|
link: str,
|
||||||
|
i: int,
|
||||||
|
directory: str,
|
||||||
|
book_id: str,
|
||||||
|
pages: int,
|
||||||
|
) -> None:
|
||||||
|
"""Download a single book page image.
|
||||||
|
|
||||||
|
Handles obfuscated images and re-borrowing on 403 errors.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
session: Authenticated requests.Session
|
||||||
|
link: Direct image URL
|
||||||
|
i: Page index (0-based)
|
||||||
|
directory: Directory to save to
|
||||||
|
book_id: Archive.org book ID (for re-borrowing on 403)
|
||||||
|
pages: Total number of pages
|
||||||
|
"""
|
||||||
|
headers = {
|
||||||
|
"Referer": "https://archive.org/",
|
||||||
|
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||||
|
"Sec-Fetch-Site": "same-site",
|
||||||
|
"Sec-Fetch-Mode": "no-cors",
|
||||||
|
"Sec-Fetch-Dest": "image",
|
||||||
|
}
|
||||||
|
retry = True
|
||||||
|
response = None
|
||||||
|
while retry:
|
||||||
|
try:
|
||||||
|
response = session.get(link, headers=headers, timeout=30)
|
||||||
|
if response.status_code == 403:
|
||||||
|
session = loan(session, book_id, verbose=False)
|
||||||
|
raise Exception("Borrow again")
|
||||||
|
if response.status_code == 200:
|
||||||
|
retry = False
|
||||||
|
except:
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
image = image_name(pages, i, directory)
|
||||||
|
|
||||||
|
if response is None:
|
||||||
|
log(f"Failed to download page {i}", file=sys.stderr)
|
||||||
|
return
|
||||||
|
|
||||||
|
obf_header = response.headers.get("X-Obfuscate")
|
||||||
|
image_content = None
|
||||||
|
if obf_header:
|
||||||
|
try:
|
||||||
|
image_content = deobfuscate_image(response.content, link, obf_header)
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Deobfuscation failed: {e}", file=sys.stderr)
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
image_content = response.content
|
||||||
|
|
||||||
|
with open(image, "wb") as f:
|
||||||
|
f.write(image_content)
|
||||||
|
|
||||||
|
|
||||||
|
def download(
|
||||||
|
session: requests.Session,
|
||||||
|
n_threads: int,
|
||||||
|
directory: str,
|
||||||
|
links: List[str],
|
||||||
|
scale: int,
|
||||||
|
book_id: str,
|
||||||
|
) -> List[str]:
|
||||||
|
"""Download all book pages as images.
|
||||||
|
|
||||||
|
Uses thread pool for parallel downloads.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
session: Authenticated requests.Session
|
||||||
|
n_threads: Number of download threads
|
||||||
|
directory: Directory to save images to
|
||||||
|
links: List of image URLs
|
||||||
|
scale: Image resolution (0=highest, 10=lowest)
|
||||||
|
book_id: Archive.org book ID (for re-borrowing)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of downloaded image file paths
|
||||||
|
"""
|
||||||
|
debug("Downloading pages...")
|
||||||
|
links = [f"{link}&rotate=0&scale={scale}" for link in links]
|
||||||
|
pages = len(links)
|
||||||
|
|
||||||
|
tasks = []
|
||||||
|
with futures.ThreadPoolExecutor(max_workers=n_threads) as executor:
|
||||||
|
for link in links:
|
||||||
|
i = links.index(link)
|
||||||
|
tasks.append(
|
||||||
|
executor.submit(
|
||||||
|
download_one_image,
|
||||||
|
session=session,
|
||||||
|
link=link,
|
||||||
|
i=i,
|
||||||
|
directory=directory,
|
||||||
|
book_id=book_id,
|
||||||
|
pages=pages,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if tqdm:
|
||||||
|
for _ in tqdm(futures.as_completed(tasks), total=len(tasks)): # type: ignore
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
for _ in futures.as_completed(tasks):
|
||||||
|
pass
|
||||||
|
|
||||||
|
images = [image_name(pages, i, directory) for i in range(len(links))]
|
||||||
|
return images
|
||||||
|
|
||||||
|
|
||||||
|
def check_direct_download(book_id: str) -> Tuple[bool, str]:
|
||||||
|
"""Check if a book can be downloaded directly without borrowing.
|
||||||
|
|
||||||
|
Searches Archive.org metadata for downloadable PDF files.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
book_id: Archive.org book identifier
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (can_download: bool, pdf_url: str)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# First, try to get the metadata to find the actual PDF filename
|
||||||
|
metadata_url = f"https://archive.org/metadata/{book_id}"
|
||||||
|
response = requests.get(metadata_url, timeout=10)
|
||||||
|
response.raise_for_status()
|
||||||
|
metadata = response.json()
|
||||||
|
|
||||||
|
# Find PDF file in files list
|
||||||
|
if "files" in metadata:
|
||||||
|
for file_info in metadata["files"]:
|
||||||
|
filename = file_info.get("name", "")
|
||||||
|
if filename.endswith(".pdf") and file_info.get("source") == "original":
|
||||||
|
# Found the original PDF
|
||||||
|
pdf_filename = filename
|
||||||
|
pdf_url = f"https://archive.org/download/{book_id}/{pdf_filename.replace(' ', '%20')}"
|
||||||
|
|
||||||
|
# Verify it's accessible
|
||||||
|
check_response = requests.head(pdf_url, timeout=5, allow_redirects=True)
|
||||||
|
if check_response.status_code == 200:
|
||||||
|
return True, pdf_url
|
||||||
|
|
||||||
|
return False, ""
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Error checking direct download: {e}", file=sys.stderr)
|
||||||
|
return False, ""
|
||||||
|
|
||||||
|
|
||||||
|
def get_openlibrary_by_isbn(isbn: str) -> Dict[str, Any]:
|
||||||
|
"""Fetch book data from OpenLibrary using ISBN.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
isbn: ISBN-10 or ISBN-13 to search for
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with book metadata from OpenLibrary
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Try ISBN API first
|
||||||
|
api_url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{isbn}&jscmd=data&format=json"
|
||||||
|
response = requests.get(api_url, timeout=10)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
if data:
|
||||||
|
# Get first result
|
||||||
|
key = list(data.keys())[0]
|
||||||
|
return data[key]
|
||||||
|
return {}
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Error fetching OpenLibrary data by ISBN: {e}", file=sys.stderr)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def extract_isbn_from_metadata(metadata: Dict[str, Any]) -> str:
|
||||||
|
"""Extract ISBN from archive.org metadata.
|
||||||
|
|
||||||
|
Looks for ISBN in various metadata fields.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
metadata: Archive.org metadata dictionary
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ISBN string (clean, no hyphens) or empty string if not found
|
||||||
|
"""
|
||||||
|
# Try various common metadata fields
|
||||||
|
isbn_fields = [
|
||||||
|
"isbn", "ISBN", "isbn_13", "isbn_10", "isbns",
|
||||||
|
"isbn-10", "isbn-13", "identifer_isbn"
|
||||||
|
]
|
||||||
|
|
||||||
|
for field in isbn_fields:
|
||||||
|
if field in metadata:
|
||||||
|
isbn_val = metadata[field]
|
||||||
|
if isinstance(isbn_val, list):
|
||||||
|
isbn_val = isbn_val[0] if isbn_val else None
|
||||||
|
if isbn_val and isinstance(isbn_val, str):
|
||||||
|
# Clean ISBN (remove hyphens, spaces)
|
||||||
|
isbn_clean = isbn_val.replace("-", "").replace(" ", "")
|
||||||
|
if len(isbn_clean) in [10, 13]:
|
||||||
|
return isbn_clean
|
||||||
|
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_url(url: str) -> str:
|
||||||
|
"""Convert openlibrary.org URL to archive.org URL.
|
||||||
|
|
||||||
|
Looks up the actual Archive.org ID from OpenLibrary API.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: Book URL (archive.org or openlibrary.org format)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Normalized archive.org URL
|
||||||
|
"""
|
||||||
|
url = url.strip()
|
||||||
|
|
||||||
|
# Already archive.org format
|
||||||
|
if url.startswith("https://archive.org/details/"):
|
||||||
|
return url
|
||||||
|
|
||||||
|
# Convert openlibrary.org format by querying the OpenLibrary API
|
||||||
|
if "openlibrary.org/books/" in url:
|
||||||
|
try:
|
||||||
|
# Extract the book ID (e.g., OL6796852M)
|
||||||
|
parts = url.split("/books/")
|
||||||
|
if len(parts) > 1:
|
||||||
|
book_id = parts[1].split("/")[0]
|
||||||
|
|
||||||
|
# Query OpenLibrary API to get the book metadata
|
||||||
|
api_url = f"https://openlibrary.org/books/{book_id}.json"
|
||||||
|
response = requests.get(api_url, timeout=10)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
# Look for identifiers including internet_archive or ocaid
|
||||||
|
# First try ocaid (Open Content Alliance ID) - this is most common
|
||||||
|
if "ocaid" in data:
|
||||||
|
ocaid = data["ocaid"]
|
||||||
|
return f"https://archive.org/details/{ocaid}"
|
||||||
|
|
||||||
|
# Check for identifiers object
|
||||||
|
if "identifiers" in data:
|
||||||
|
identifiers = data["identifiers"]
|
||||||
|
|
||||||
|
# Look for internet_archive ID
|
||||||
|
if "internet_archive" in identifiers:
|
||||||
|
ia_ids = identifiers["internet_archive"]
|
||||||
|
if isinstance(ia_ids, list) and ia_ids:
|
||||||
|
ia_id = ia_ids[0]
|
||||||
|
else:
|
||||||
|
ia_id = ia_ids
|
||||||
|
return f"https://archive.org/details/{ia_id}"
|
||||||
|
|
||||||
|
# If no IA identifier found, use the book ID as fallback
|
||||||
|
log(f"No Internet Archive ID found for {book_id}. Attempting with OpenLibrary ID.", file=sys.stderr)
|
||||||
|
return f"https://archive.org/details/{book_id}"
|
||||||
|
|
||||||
|
except requests.RequestException as e:
|
||||||
|
log(f"Could not fetch OpenLibrary metadata: {e}", file=sys.stderr)
|
||||||
|
# Fallback to using the book ID directly
|
||||||
|
parts = url.split("/books/")
|
||||||
|
if len(parts) > 1:
|
||||||
|
book_id = parts[1].split("/")[0]
|
||||||
|
return f"https://archive.org/details/{book_id}"
|
||||||
|
except (KeyError, IndexError) as e:
|
||||||
|
log(f"Error parsing OpenLibrary response: {e}", file=sys.stderr)
|
||||||
|
# Fallback to using the book ID directly
|
||||||
|
parts = url.split("/books/")
|
||||||
|
if len(parts) > 1:
|
||||||
|
book_id = parts[1].split("/")[0]
|
||||||
|
return f"https://archive.org/details/{book_id}"
|
||||||
|
|
||||||
|
# Return original if can't parse
|
||||||
|
return url
|
||||||
730
helper/download.py
Normal file
730
helper/download.py
Normal file
@@ -0,0 +1,730 @@
|
|||||||
|
"""Download media files using yt-dlp with support for direct file downloads.
|
||||||
|
|
||||||
|
Lean, focused downloader without event infrastructure overhead.
|
||||||
|
- yt-dlp integration for streaming sites
|
||||||
|
- Direct file download fallback for PDFs, images, documents
|
||||||
|
- Tag extraction via metadata.extract_ytdlp_tags()
|
||||||
|
- Logging via helper.logger.log()
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re # noqa: F401
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import traceback
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, Iterator, List, Optional
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from helper.logger import log, debug
|
||||||
|
from .utils import ensure_directory, sha256_file
|
||||||
|
from .http_client import HTTPClient
|
||||||
|
from models import DownloadError, DownloadOptions, DownloadMediaResult, DebugLogger, ProgressBar
|
||||||
|
|
||||||
|
try:
|
||||||
|
import yt_dlp # type: ignore
|
||||||
|
from yt_dlp.extractor import gen_extractors # type: ignore
|
||||||
|
except Exception as exc:
|
||||||
|
yt_dlp = None # type: ignore
|
||||||
|
YTDLP_IMPORT_ERROR = exc
|
||||||
|
else:
|
||||||
|
YTDLP_IMPORT_ERROR = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
from metadata import extract_ytdlp_tags
|
||||||
|
except ImportError:
|
||||||
|
extract_ytdlp_tags = None
|
||||||
|
|
||||||
|
_EXTRACTOR_CACHE: List[Any] | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_yt_dlp_ready() -> None:
|
||||||
|
"""Verify yt-dlp is available, raise if not."""
|
||||||
|
if yt_dlp is not None:
|
||||||
|
return
|
||||||
|
detail = str(YTDLP_IMPORT_ERROR or "yt-dlp is not installed")
|
||||||
|
raise DownloadError(f"yt-dlp module not available: {detail}")
|
||||||
|
|
||||||
|
|
||||||
|
def _progress_callback(status: Dict[str, Any]) -> None:
|
||||||
|
"""Simple progress callback using logger."""
|
||||||
|
event = status.get("status")
|
||||||
|
if event == "downloading":
|
||||||
|
percent = status.get("_percent_str", "?")
|
||||||
|
speed = status.get("_speed_str", "?")
|
||||||
|
debug(f"Downloading {percent} at {speed}")
|
||||||
|
elif event == "finished":
|
||||||
|
debug(f"✓ Download finished: {status.get('filename')}")
|
||||||
|
elif event in ("postprocessing", "processing"):
|
||||||
|
debug(f"Post-processing: {status.get('postprocessor')}")
|
||||||
|
|
||||||
|
|
||||||
|
def is_url_supported_by_ytdlp(url: str) -> bool:
|
||||||
|
"""Check if URL is supported by yt-dlp."""
|
||||||
|
if yt_dlp is None:
|
||||||
|
return False
|
||||||
|
global _EXTRACTOR_CACHE
|
||||||
|
if _EXTRACTOR_CACHE is None:
|
||||||
|
try:
|
||||||
|
_EXTRACTOR_CACHE = [ie for ie in gen_extractors()] # type: ignore[arg-type]
|
||||||
|
except Exception:
|
||||||
|
_EXTRACTOR_CACHE = []
|
||||||
|
for extractor in _EXTRACTOR_CACHE:
|
||||||
|
try:
|
||||||
|
if not extractor.suitable(url):
|
||||||
|
continue
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
name = getattr(extractor, "IE_NAME", "")
|
||||||
|
if name.lower() == "generic":
|
||||||
|
continue
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[str] = None) -> Optional[List[Dict[str, Any]]]:
|
||||||
|
"""Get list of available formats for a URL using yt-dlp.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: URL to get formats for
|
||||||
|
no_playlist: If True, ignore playlists and list formats for single video
|
||||||
|
playlist_items: If specified, only list formats for these playlist items (e.g., "1,3,5-8")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of format dictionaries with keys: format_id, format, resolution, fps, vcodec, acodec, filesize, etc.
|
||||||
|
Returns None if yt-dlp is not available or format listing fails.
|
||||||
|
"""
|
||||||
|
_ensure_yt_dlp_ready()
|
||||||
|
|
||||||
|
try:
|
||||||
|
ydl_opts = {
|
||||||
|
"quiet": False,
|
||||||
|
"no_warnings": False,
|
||||||
|
"socket_timeout": 30,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add no_playlist option if specified
|
||||||
|
if no_playlist:
|
||||||
|
ydl_opts["noplaylist"] = True
|
||||||
|
|
||||||
|
# Add playlist_items filter if specified
|
||||||
|
if playlist_items:
|
||||||
|
ydl_opts["playlist_items"] = playlist_items
|
||||||
|
|
||||||
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||||
|
debug(f"Fetching format list for: {url}")
|
||||||
|
info = ydl.extract_info(url, download=False)
|
||||||
|
|
||||||
|
formats = info.get("formats", [])
|
||||||
|
if not formats:
|
||||||
|
log("No formats available", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Parse and extract relevant format info
|
||||||
|
result_formats = []
|
||||||
|
for fmt in formats:
|
||||||
|
format_info = {
|
||||||
|
"format_id": fmt.get("format_id", ""),
|
||||||
|
"format": fmt.get("format", ""),
|
||||||
|
"ext": fmt.get("ext", ""),
|
||||||
|
"resolution": fmt.get("resolution", ""),
|
||||||
|
"width": fmt.get("width"),
|
||||||
|
"height": fmt.get("height"),
|
||||||
|
"fps": fmt.get("fps"),
|
||||||
|
"vcodec": fmt.get("vcodec", "none"),
|
||||||
|
"acodec": fmt.get("acodec", "none"),
|
||||||
|
"filesize": fmt.get("filesize"),
|
||||||
|
"tbr": fmt.get("tbr"), # Total bitrate
|
||||||
|
}
|
||||||
|
result_formats.append(format_info)
|
||||||
|
|
||||||
|
debug(f"Found {len(result_formats)} available formats")
|
||||||
|
return result_formats
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log(f"✗ Error fetching formats: {e}", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
|
||||||
|
"""Build yt-dlp download options."""
|
||||||
|
ensure_directory(opts.output_dir)
|
||||||
|
|
||||||
|
outtmpl = str((opts.output_dir / "%(title)s.%(ext)s").resolve())
|
||||||
|
|
||||||
|
base_options: Dict[str, Any] = {
|
||||||
|
"outtmpl": outtmpl,
|
||||||
|
"quiet": False,
|
||||||
|
"no_warnings": False,
|
||||||
|
"noprogress": False,
|
||||||
|
"socket_timeout": 30,
|
||||||
|
"retries": 10,
|
||||||
|
"fragment_retries": 10,
|
||||||
|
"http_chunk_size": 10_485_760,
|
||||||
|
"restrictfilenames": True,
|
||||||
|
"progress_hooks": [_progress_callback],
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.cookies_path and opts.cookies_path.is_file():
|
||||||
|
base_options["cookiefile"] = str(opts.cookies_path)
|
||||||
|
|
||||||
|
# Add no-playlist option if specified (for single video from playlist URLs)
|
||||||
|
if opts.no_playlist:
|
||||||
|
base_options["noplaylist"] = True
|
||||||
|
|
||||||
|
# Configure based on mode
|
||||||
|
if opts.mode == "audio":
|
||||||
|
base_options["format"] = opts.ytdl_format or "251/140/bestaudio"
|
||||||
|
base_options["postprocessors"] = [{"key": "FFmpegExtractAudio"}]
|
||||||
|
else: # video
|
||||||
|
base_options["format"] = opts.ytdl_format or "bestvideo+bestaudio/best"
|
||||||
|
base_options["format_sort"] = [
|
||||||
|
"res:4320", "res:2880", "res:2160", "res:1440", "res:1080", "res:720", "res"
|
||||||
|
]
|
||||||
|
|
||||||
|
# Add clip sections if provided
|
||||||
|
if opts.clip_sections:
|
||||||
|
base_options["download_sections"] = opts.clip_sections
|
||||||
|
|
||||||
|
# Add playlist items selection if provided
|
||||||
|
if opts.playlist_items:
|
||||||
|
base_options["playlist_items"] = opts.playlist_items
|
||||||
|
|
||||||
|
debug(f"yt-dlp: mode={opts.mode}, format={base_options.get('format')}")
|
||||||
|
return base_options
|
||||||
|
|
||||||
|
|
||||||
|
def _iter_download_entries(info: Dict[str, Any]) -> Iterator[Dict[str, Any]]:
|
||||||
|
"""Iterate through download entries, handling playlists."""
|
||||||
|
queue: List[Dict[str, Any]] = [info]
|
||||||
|
seen: set[int] = set()
|
||||||
|
while queue:
|
||||||
|
current = queue.pop(0)
|
||||||
|
obj_id = id(current)
|
||||||
|
if obj_id in seen:
|
||||||
|
continue
|
||||||
|
seen.add(obj_id)
|
||||||
|
entries = current.get("entries")
|
||||||
|
if isinstance(entries, list):
|
||||||
|
for entry in entries:
|
||||||
|
if isinstance(entry, dict):
|
||||||
|
queue.append(entry)
|
||||||
|
if current.get("requested_downloads") or not entries:
|
||||||
|
yield current
|
||||||
|
|
||||||
|
|
||||||
|
def _candidate_paths(entry: Dict[str, Any], output_dir: Path) -> Iterator[Path]:
|
||||||
|
"""Get candidate file paths for downloaded media."""
|
||||||
|
requested = entry.get("requested_downloads")
|
||||||
|
if isinstance(requested, list):
|
||||||
|
for item in requested:
|
||||||
|
if isinstance(item, dict):
|
||||||
|
for key in ("filepath", "_filename", "filename"):
|
||||||
|
value = item.get(key)
|
||||||
|
if value:
|
||||||
|
yield Path(value)
|
||||||
|
for key in ("filepath", "_filename", "filename"):
|
||||||
|
value = entry.get(key)
|
||||||
|
if value:
|
||||||
|
yield Path(value)
|
||||||
|
if entry.get("filename"):
|
||||||
|
yield output_dir / entry["filename"]
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_entry_and_path(info: Dict[str, Any], output_dir: Path) -> tuple[Dict[str, Any], Path]:
|
||||||
|
"""Find downloaded file in yt-dlp metadata."""
|
||||||
|
for entry in _iter_download_entries(info):
|
||||||
|
for candidate in _candidate_paths(entry, output_dir):
|
||||||
|
if candidate.is_file():
|
||||||
|
return entry, candidate
|
||||||
|
if not candidate.is_absolute():
|
||||||
|
resolved = output_dir / candidate
|
||||||
|
if resolved.is_file():
|
||||||
|
return entry, resolved
|
||||||
|
raise FileNotFoundError("yt-dlp did not report a downloaded media file")
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_sha256(info: Dict[str, Any]) -> Optional[str]:
|
||||||
|
"""Extract SHA256 hash from yt-dlp metadata."""
|
||||||
|
for payload in [info] + info.get("entries", []):
|
||||||
|
if not isinstance(payload, dict):
|
||||||
|
continue
|
||||||
|
hashes = payload.get("hashes")
|
||||||
|
if isinstance(hashes, dict):
|
||||||
|
for key in ("sha256", "sha-256", "sha_256"):
|
||||||
|
value = hashes.get(key)
|
||||||
|
if isinstance(value, str) and value.strip():
|
||||||
|
return value.strip().lower()
|
||||||
|
for key in ("sha256", "sha-256", "sha_256"):
|
||||||
|
value = payload.get(key)
|
||||||
|
if isinstance(value, str) and value.strip():
|
||||||
|
return value.strip().lower()
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
|
||||||
|
"""Extract the actual download link from LibGen redirect URL.
|
||||||
|
|
||||||
|
LibGen URLs like https://libgen.gl/file.php?id=123456 redirect to
|
||||||
|
actual mirror URLs. This follows the redirect chain to get the real file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
libgen_url: LibGen file.php URL
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Actual download URL or None if extraction fails
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import requests
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
# Check if this is a LibGen URL
|
||||||
|
parsed = urlparse(libgen_url)
|
||||||
|
if 'libgen' not in parsed.netloc.lower():
|
||||||
|
return None
|
||||||
|
|
||||||
|
if '/file.php' not in parsed.path.lower():
|
||||||
|
return None
|
||||||
|
|
||||||
|
# LibGen redirects to actual mirrors, follow redirects to get final URL
|
||||||
|
session = requests.Session()
|
||||||
|
session.headers.update({
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
||||||
|
})
|
||||||
|
|
||||||
|
debug(f"Following LibGen redirect chain for: {libgen_url}")
|
||||||
|
|
||||||
|
# First, get the page and look for direct download link
|
||||||
|
try:
|
||||||
|
response = session.get(libgen_url, timeout=10, allow_redirects=True)
|
||||||
|
final_url = response.url
|
||||||
|
|
||||||
|
# Try to find actual download link in the page
|
||||||
|
try:
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
soup = BeautifulSoup(response.content, 'html.parser')
|
||||||
|
|
||||||
|
# Look for download links - LibGen typically has forms with download buttons
|
||||||
|
# Look for all links and forms that might lead to download
|
||||||
|
for link in soup.find_all('a'):
|
||||||
|
href = link.get('href')
|
||||||
|
if href and isinstance(href, str):
|
||||||
|
# Look for direct file links or get.php redirects
|
||||||
|
if 'get.php' in href.lower() or href.endswith(('.pdf', '.epub', '.djvu', '.mobi')):
|
||||||
|
download_url = href if href.startswith('http') else urljoin(final_url, href)
|
||||||
|
debug(f"Found download link: {download_url}")
|
||||||
|
return download_url
|
||||||
|
except ImportError:
|
||||||
|
pass # BeautifulSoup not available
|
||||||
|
|
||||||
|
# If we followed redirects successfully, return the final URL
|
||||||
|
# This handles cases where libgen redirects to a direct download mirror
|
||||||
|
if final_url != libgen_url:
|
||||||
|
debug(f"LibGen resolved to mirror: {final_url}")
|
||||||
|
return final_url
|
||||||
|
|
||||||
|
except requests.RequestException as e:
|
||||||
|
log(f"Error following LibGen redirects: {e}", file=sys.stderr)
|
||||||
|
# Try head request as fallback
|
||||||
|
try:
|
||||||
|
response = session.head(libgen_url, allow_redirects=True, timeout=10)
|
||||||
|
if response.url != libgen_url:
|
||||||
|
debug(f"LibGen HEAD resolved to: {response.url}")
|
||||||
|
return response.url
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Error resolving LibGen URL: {e}", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _download_direct_file(
|
||||||
|
url: str,
|
||||||
|
output_dir: Path,
|
||||||
|
debug_logger: Optional[DebugLogger] = None,
|
||||||
|
) -> DownloadMediaResult:
|
||||||
|
"""Download a direct file (PDF, image, document, etc.) without yt-dlp."""
|
||||||
|
ensure_directory(output_dir)
|
||||||
|
|
||||||
|
from urllib.parse import unquote, urlparse, parse_qs
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Extract filename from URL
|
||||||
|
parsed_url = urlparse(url)
|
||||||
|
url_path = parsed_url.path
|
||||||
|
|
||||||
|
# Try to get filename from query parameters first (for LibGen and similar services)
|
||||||
|
# e.g., ?filename=Book+Title.pdf or &download=filename.pdf
|
||||||
|
filename = None
|
||||||
|
if parsed_url.query:
|
||||||
|
query_params = parse_qs(parsed_url.query)
|
||||||
|
for param_name in ('filename', 'download', 'file', 'name'):
|
||||||
|
if param_name in query_params and query_params[param_name]:
|
||||||
|
filename = query_params[param_name][0]
|
||||||
|
filename = unquote(filename)
|
||||||
|
break
|
||||||
|
|
||||||
|
# If not found in query params, extract from URL path
|
||||||
|
if not filename or not filename.strip():
|
||||||
|
filename = url_path.split("/")[-1] if url_path else ""
|
||||||
|
filename = unquote(filename)
|
||||||
|
|
||||||
|
# Remove query strings from filename if any
|
||||||
|
if "?" in filename:
|
||||||
|
filename = filename.split("?")[0]
|
||||||
|
|
||||||
|
# Try to get real filename from Content-Disposition header (HEAD request)
|
||||||
|
try:
|
||||||
|
with HTTPClient(timeout=10.0) as client:
|
||||||
|
response = client._request("HEAD", url, follow_redirects=True)
|
||||||
|
content_disposition = response.headers.get("content-disposition", "")
|
||||||
|
if content_disposition:
|
||||||
|
# Extract filename from Content-Disposition header
|
||||||
|
# Format: attachment; filename="filename.pdf" or filename=filename.pdf
|
||||||
|
match = re.search(r'filename\*?=(?:"([^"]*)"|([^;\s]*))', content_disposition)
|
||||||
|
if match:
|
||||||
|
extracted_name = match.group(1) or match.group(2)
|
||||||
|
if extracted_name:
|
||||||
|
filename = unquote(extracted_name)
|
||||||
|
debug(f"Filename from Content-Disposition: {filename}")
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Could not get filename from headers: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Fallback if we still don't have a good filename
|
||||||
|
if not filename or "." not in filename:
|
||||||
|
filename = "downloaded_file.bin"
|
||||||
|
|
||||||
|
file_path = output_dir / filename
|
||||||
|
progress_bar = ProgressBar()
|
||||||
|
|
||||||
|
debug(f"Direct download: {filename}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
start_time = time.time()
|
||||||
|
downloaded_bytes = [0]
|
||||||
|
total_bytes = [0]
|
||||||
|
last_progress_time = [start_time]
|
||||||
|
|
||||||
|
def progress_callback(bytes_downloaded: int, content_length: int) -> None:
|
||||||
|
downloaded_bytes[0] = bytes_downloaded
|
||||||
|
total_bytes[0] = content_length
|
||||||
|
|
||||||
|
now = time.time()
|
||||||
|
if now - last_progress_time[0] >= 0.5 and total_bytes[0] > 0:
|
||||||
|
elapsed = now - start_time
|
||||||
|
percent = (bytes_downloaded / content_length) * 100 if content_length > 0 else 0
|
||||||
|
speed = bytes_downloaded / elapsed if elapsed > 0 else 0
|
||||||
|
eta_seconds = (content_length - bytes_downloaded) / speed if speed > 0 else 0
|
||||||
|
|
||||||
|
speed_str = progress_bar.format_bytes(speed) + "/s"
|
||||||
|
minutes, seconds = divmod(int(eta_seconds), 60)
|
||||||
|
hours, minutes = divmod(minutes, 60)
|
||||||
|
eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
|
||||||
|
|
||||||
|
progress_line = progress_bar.format_progress(
|
||||||
|
percent_str=f"{percent:.1f}%",
|
||||||
|
downloaded=bytes_downloaded,
|
||||||
|
total=content_length,
|
||||||
|
speed_str=speed_str,
|
||||||
|
eta_str=eta_str,
|
||||||
|
)
|
||||||
|
debug(progress_line)
|
||||||
|
last_progress_time[0] = now
|
||||||
|
|
||||||
|
with HTTPClient(timeout=30.0) as client:
|
||||||
|
client.download(url, str(file_path), progress_callback=progress_callback)
|
||||||
|
|
||||||
|
elapsed = time.time() - start_time
|
||||||
|
avg_speed_str = progress_bar.format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0) + "/s"
|
||||||
|
debug(f"✓ Downloaded in {elapsed:.1f}s at {avg_speed_str}")
|
||||||
|
|
||||||
|
# For direct file downloads, create minimal info dict without filename as title
|
||||||
|
# This prevents creating duplicate title: tags when filename gets auto-generated
|
||||||
|
# We'll add title back later only if we couldn't extract meaningful tags
|
||||||
|
info = {
|
||||||
|
"id": filename.rsplit(".", 1)[0],
|
||||||
|
"ext": filename.rsplit(".", 1)[1] if "." in filename else "bin",
|
||||||
|
"webpage_url": url,
|
||||||
|
}
|
||||||
|
|
||||||
|
hash_value = None
|
||||||
|
try:
|
||||||
|
hash_value = sha256_file(file_path)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
tags = []
|
||||||
|
if extract_ytdlp_tags:
|
||||||
|
try:
|
||||||
|
tags = extract_ytdlp_tags(info)
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Error extracting tags: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Only use filename as a title tag if we couldn't extract any meaningful tags
|
||||||
|
# This prevents duplicate title: tags when the filename could be mistaken for metadata
|
||||||
|
if not any(t.startswith('title:') for t in tags):
|
||||||
|
# Re-extract tags with filename as title only if needed
|
||||||
|
info['title'] = filename
|
||||||
|
tags = []
|
||||||
|
if extract_ytdlp_tags:
|
||||||
|
try:
|
||||||
|
tags = extract_ytdlp_tags(info)
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Error extracting tags with filename: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
if debug_logger is not None:
|
||||||
|
debug_logger.write_record(
|
||||||
|
"direct-file-downloaded",
|
||||||
|
{"url": url, "path": str(file_path), "hash": hash_value},
|
||||||
|
)
|
||||||
|
|
||||||
|
return DownloadMediaResult(
|
||||||
|
path=file_path,
|
||||||
|
info=info,
|
||||||
|
tags=tags,
|
||||||
|
source_url=url,
|
||||||
|
hash_value=hash_value,
|
||||||
|
)
|
||||||
|
|
||||||
|
except (httpx.HTTPError, httpx.RequestError) as exc:
|
||||||
|
log(f"Download error: {exc}", file=sys.stderr)
|
||||||
|
if debug_logger is not None:
|
||||||
|
debug_logger.write_record(
|
||||||
|
"exception",
|
||||||
|
{"phase": "direct-file", "url": url, "error": str(exc)},
|
||||||
|
)
|
||||||
|
raise DownloadError(f"Failed to download {url}: {exc}") from exc
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Error downloading file: {exc}", file=sys.stderr)
|
||||||
|
if debug_logger is not None:
|
||||||
|
debug_logger.write_record(
|
||||||
|
"exception",
|
||||||
|
{
|
||||||
|
"phase": "direct-file",
|
||||||
|
"url": url,
|
||||||
|
"error": str(exc),
|
||||||
|
"traceback": traceback.format_exc(),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
raise DownloadError(f"Error downloading file: {exc}") from exc
|
||||||
|
|
||||||
|
|
||||||
|
def probe_url(url: str, no_playlist: bool = False) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Probe URL to extract metadata WITHOUT downloading.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: URL to probe
|
||||||
|
no_playlist: If True, ignore playlists and probe only the single video
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with keys: extractor, title, entries (if playlist), duration, etc.
|
||||||
|
Returns None if not supported by yt-dlp.
|
||||||
|
"""
|
||||||
|
if not is_url_supported_by_ytdlp(url):
|
||||||
|
return None
|
||||||
|
|
||||||
|
_ensure_yt_dlp_ready()
|
||||||
|
|
||||||
|
assert yt_dlp is not None
|
||||||
|
try:
|
||||||
|
# Extract info without downloading
|
||||||
|
# Use extract_flat='in_playlist' to get full metadata for playlist items
|
||||||
|
ydl_opts = {
|
||||||
|
"quiet": True, # Suppress all output
|
||||||
|
"no_warnings": True,
|
||||||
|
"socket_timeout": 10,
|
||||||
|
"retries": 3,
|
||||||
|
"skip_download": True, # Don't actually download
|
||||||
|
"extract_flat": "in_playlist", # Get playlist with metadata for each entry
|
||||||
|
"noprogress": True, # No progress bars
|
||||||
|
"quiet": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add no_playlist option if specified
|
||||||
|
if no_playlist:
|
||||||
|
ydl_opts["noplaylist"] = True
|
||||||
|
|
||||||
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
|
||||||
|
info = ydl.extract_info(url, download=False)
|
||||||
|
|
||||||
|
if not isinstance(info, dict):
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Extract relevant fields
|
||||||
|
return {
|
||||||
|
"extractor": info.get("extractor", ""),
|
||||||
|
"title": info.get("title", ""),
|
||||||
|
"entries": info.get("entries", []), # Will be populated if playlist
|
||||||
|
"duration": info.get("duration"),
|
||||||
|
"uploader": info.get("uploader"),
|
||||||
|
"description": info.get("description"),
|
||||||
|
"url": url,
|
||||||
|
}
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Probe failed for {url}: {exc}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def download_media(
|
||||||
|
opts: DownloadOptions,
|
||||||
|
*,
|
||||||
|
debug_logger: Optional[DebugLogger] = None,
|
||||||
|
) -> DownloadMediaResult:
|
||||||
|
"""Download media from URL using yt-dlp or direct HTTP download.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
opts: DownloadOptions with url, mode, output_dir, etc.
|
||||||
|
debug_logger: Optional debug logger for troubleshooting
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
DownloadMediaResult with path, info, tags, hash
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
DownloadError: If download fails
|
||||||
|
"""
|
||||||
|
# Handle LibGen URLs specially
|
||||||
|
# file.php redirects to mirrors, get.php is direct from modern API
|
||||||
|
if 'libgen' in opts.url.lower():
|
||||||
|
if '/get.php' in opts.url.lower():
|
||||||
|
# Modern API get.php links are direct downloads from mirrors (not file redirects)
|
||||||
|
log(f"Detected LibGen get.php URL, downloading directly...")
|
||||||
|
if debug_logger is not None:
|
||||||
|
debug_logger.write_record("libgen-direct", {"url": opts.url})
|
||||||
|
return _download_direct_file(opts.url, opts.output_dir, debug_logger)
|
||||||
|
elif '/file.php' in opts.url.lower():
|
||||||
|
# Old-style file.php redirects to mirrors, we need to resolve
|
||||||
|
log(f"Detected LibGen file.php URL, resolving to actual mirror...")
|
||||||
|
actual_url = _get_libgen_download_url(opts.url)
|
||||||
|
if actual_url and actual_url != opts.url:
|
||||||
|
log(f"Resolved LibGen URL to mirror: {actual_url}")
|
||||||
|
opts.url = actual_url
|
||||||
|
# After resolution, this will typically be an onion link or direct file
|
||||||
|
# Skip yt-dlp for this (it won't support onion/mirrors), go direct
|
||||||
|
if debug_logger is not None:
|
||||||
|
debug_logger.write_record("libgen-resolved", {"original": opts.url, "resolved": actual_url})
|
||||||
|
return _download_direct_file(opts.url, opts.output_dir, debug_logger)
|
||||||
|
else:
|
||||||
|
log(f"Could not resolve LibGen URL, trying direct download anyway", file=sys.stderr)
|
||||||
|
if debug_logger is not None:
|
||||||
|
debug_logger.write_record("libgen-resolve-failed", {"url": opts.url})
|
||||||
|
return _download_direct_file(opts.url, opts.output_dir, debug_logger)
|
||||||
|
|
||||||
|
# Try yt-dlp first if URL is supported
|
||||||
|
if not is_url_supported_by_ytdlp(opts.url):
|
||||||
|
log(f"URL not supported by yt-dlp, trying direct download: {opts.url}")
|
||||||
|
if debug_logger is not None:
|
||||||
|
debug_logger.write_record("direct-file-attempt", {"url": opts.url})
|
||||||
|
return _download_direct_file(opts.url, opts.output_dir, debug_logger)
|
||||||
|
|
||||||
|
_ensure_yt_dlp_ready()
|
||||||
|
|
||||||
|
ytdl_options = _build_ytdlp_options(opts)
|
||||||
|
log(f"Starting yt-dlp download: {opts.url}")
|
||||||
|
if debug_logger is not None:
|
||||||
|
debug_logger.write_record("ytdlp-start", {"url": opts.url})
|
||||||
|
|
||||||
|
assert yt_dlp is not None
|
||||||
|
try:
|
||||||
|
with yt_dlp.YoutubeDL(ytdl_options) as ydl: # type: ignore[arg-type]
|
||||||
|
info = ydl.extract_info(opts.url, download=True)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"yt-dlp failed: {exc}", file=sys.stderr)
|
||||||
|
if debug_logger is not None:
|
||||||
|
debug_logger.write_record(
|
||||||
|
"exception",
|
||||||
|
{
|
||||||
|
"phase": "yt-dlp",
|
||||||
|
"error": str(exc),
|
||||||
|
"traceback": traceback.format_exc(),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
raise DownloadError("yt-dlp download failed") from exc
|
||||||
|
|
||||||
|
if not isinstance(info, dict):
|
||||||
|
log(f"Unexpected yt-dlp response: {type(info)}", file=sys.stderr)
|
||||||
|
raise DownloadError("Unexpected yt-dlp response type")
|
||||||
|
|
||||||
|
info_dict: Dict[str, Any] = info
|
||||||
|
if debug_logger is not None:
|
||||||
|
debug_logger.write_record(
|
||||||
|
"ytdlp-info",
|
||||||
|
{
|
||||||
|
"keys": sorted(info_dict.keys()),
|
||||||
|
"is_playlist": bool(info_dict.get("entries")),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
entry, media_path = _resolve_entry_and_path(info_dict, opts.output_dir)
|
||||||
|
except FileNotFoundError as exc:
|
||||||
|
log(f"Error: {exc}", file=sys.stderr)
|
||||||
|
if debug_logger is not None:
|
||||||
|
debug_logger.write_record(
|
||||||
|
"exception",
|
||||||
|
{"phase": "resolve-path", "error": str(exc)},
|
||||||
|
)
|
||||||
|
raise DownloadError(str(exc)) from exc
|
||||||
|
|
||||||
|
if debug_logger is not None:
|
||||||
|
debug_logger.write_record(
|
||||||
|
"resolved-media",
|
||||||
|
{"path": str(media_path), "entry_keys": sorted(entry.keys())},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract hash from metadata or compute
|
||||||
|
hash_value = _extract_sha256(entry) or _extract_sha256(info_dict)
|
||||||
|
if not hash_value:
|
||||||
|
try:
|
||||||
|
hash_value = sha256_file(media_path)
|
||||||
|
except OSError as exc:
|
||||||
|
if debug_logger is not None:
|
||||||
|
debug_logger.write_record(
|
||||||
|
"hash-error",
|
||||||
|
{"path": str(media_path), "error": str(exc)},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract tags using metadata.py
|
||||||
|
tags = []
|
||||||
|
if extract_ytdlp_tags:
|
||||||
|
try:
|
||||||
|
tags = extract_ytdlp_tags(entry)
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Error extracting tags: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
source_url = (
|
||||||
|
entry.get("webpage_url")
|
||||||
|
or entry.get("original_url")
|
||||||
|
or entry.get("url")
|
||||||
|
)
|
||||||
|
|
||||||
|
log(f"✓ Downloaded: {media_path.name} ({len(tags)} tags)")
|
||||||
|
if debug_logger is not None:
|
||||||
|
debug_logger.write_record(
|
||||||
|
"downloaded",
|
||||||
|
{
|
||||||
|
"path": str(media_path),
|
||||||
|
"tag_count": len(tags),
|
||||||
|
"source_url": source_url,
|
||||||
|
"sha256": hash_value,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
return DownloadMediaResult(
|
||||||
|
path=media_path,
|
||||||
|
info=entry,
|
||||||
|
tags=tags,
|
||||||
|
source_url=source_url,
|
||||||
|
hash_value=hash_value,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"download_media",
|
||||||
|
"is_url_supported_by_ytdlp",
|
||||||
|
"DownloadError",
|
||||||
|
"DownloadOptions",
|
||||||
|
"DownloadMediaResult",
|
||||||
|
]
|
||||||
180
helper/file_server.py
Normal file
180
helper/file_server.py
Normal file
@@ -0,0 +1,180 @@
|
|||||||
|
"""Simple HTTP file server for serving files in web mode."""
|
||||||
|
|
||||||
|
import threading
|
||||||
|
import socket
|
||||||
|
import logging
|
||||||
|
from http.server import HTTPServer, SimpleHTTPRequestHandler
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
import mimetypes
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Global server instance
|
||||||
|
_file_server: Optional[HTTPServer] = None
|
||||||
|
_server_thread: Optional[threading.Thread] = None
|
||||||
|
_server_port: int = 8001
|
||||||
|
|
||||||
|
|
||||||
|
class FileServerHandler(SimpleHTTPRequestHandler):
|
||||||
|
"""HTTP request handler for file serving."""
|
||||||
|
|
||||||
|
def do_GET(self):
|
||||||
|
"""Handle GET requests."""
|
||||||
|
# Parse the path
|
||||||
|
parsed_path = urllib.parse.urlparse(self.path)
|
||||||
|
file_path = urllib.parse.unquote(parsed_path.path)
|
||||||
|
|
||||||
|
# Remove leading slash
|
||||||
|
if file_path.startswith('/'):
|
||||||
|
file_path = file_path[1:]
|
||||||
|
|
||||||
|
# Decode the file path (it's URL encoded)
|
||||||
|
try:
|
||||||
|
full_path = Path(file_path).resolve()
|
||||||
|
|
||||||
|
# Security check: ensure the path is within allowed directories
|
||||||
|
# For now, allow all paths (can be restricted later)
|
||||||
|
|
||||||
|
if full_path.is_file() and full_path.exists():
|
||||||
|
# Serve the file
|
||||||
|
logger.debug(f"Serving file: {full_path}")
|
||||||
|
|
||||||
|
# Determine content type
|
||||||
|
content_type, _ = mimetypes.guess_type(str(full_path))
|
||||||
|
if content_type is None:
|
||||||
|
content_type = 'application/octet-stream'
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(full_path, 'rb') as f:
|
||||||
|
file_content = f.read()
|
||||||
|
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header('Content-type', content_type)
|
||||||
|
self.send_header('Content-Length', str(len(file_content)))
|
||||||
|
self.send_header('Content-Disposition', f'attachment; filename="{full_path.name}"')
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(file_content)
|
||||||
|
logger.info(f"Successfully served file: {full_path.name}")
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error serving file: {e}")
|
||||||
|
self.send_error(500, "Internal server error")
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
logger.warning(f"File not found: {full_path}")
|
||||||
|
self.send_error(404, "File not found")
|
||||||
|
return
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error handling request: {e}")
|
||||||
|
self.send_error(400, "Bad request")
|
||||||
|
|
||||||
|
def log_message(self, format, *args):
|
||||||
|
"""Override to use our logger instead of stderr."""
|
||||||
|
logger.debug(format % args)
|
||||||
|
|
||||||
|
|
||||||
|
def get_local_ip() -> Optional[str]:
|
||||||
|
"""Get the local IP address that's accessible from other devices."""
|
||||||
|
try:
|
||||||
|
# Connect to a remote server to determine local IP
|
||||||
|
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||||
|
s.connect(("8.8.8.8", 80))
|
||||||
|
ip = s.getsockname()[0]
|
||||||
|
s.close()
|
||||||
|
return ip
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to determine local IP: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def start_file_server(port: int = 8001) -> Optional[str]:
|
||||||
|
"""Start the HTTP file server.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
port: Port to serve on
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Server URL if successful, None otherwise
|
||||||
|
"""
|
||||||
|
global _file_server, _server_thread, _server_port
|
||||||
|
|
||||||
|
if _file_server is not None:
|
||||||
|
logger.debug(f"File server already running on port {_server_port}")
|
||||||
|
local_ip = get_local_ip()
|
||||||
|
if local_ip:
|
||||||
|
return f"http://{local_ip}:{_server_port}"
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
_server_port = port
|
||||||
|
|
||||||
|
# Create server
|
||||||
|
server_address = ('', port)
|
||||||
|
_file_server = HTTPServer(server_address, FileServerHandler)
|
||||||
|
|
||||||
|
# Start in daemon thread
|
||||||
|
_server_thread = threading.Thread(target=_file_server.serve_forever, daemon=True)
|
||||||
|
_server_thread.start()
|
||||||
|
|
||||||
|
logger.info(f"File server started on port {port}")
|
||||||
|
|
||||||
|
# Get local IP
|
||||||
|
local_ip = get_local_ip()
|
||||||
|
if local_ip:
|
||||||
|
server_url = f"http://{local_ip}:{port}"
|
||||||
|
logger.info(f"File server accessible at: {server_url}")
|
||||||
|
return server_url
|
||||||
|
else:
|
||||||
|
logger.warning("Could not determine local IP")
|
||||||
|
return None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to start file server: {e}")
|
||||||
|
_file_server = None
|
||||||
|
_server_thread = None
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def stop_file_server():
|
||||||
|
"""Stop the HTTP file server."""
|
||||||
|
global _file_server, _server_thread
|
||||||
|
|
||||||
|
if _file_server is not None:
|
||||||
|
try:
|
||||||
|
_file_server.shutdown()
|
||||||
|
_file_server.server_close()
|
||||||
|
logger.info("File server stopped")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error stopping file server: {e}")
|
||||||
|
finally:
|
||||||
|
_file_server = None
|
||||||
|
_server_thread = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_file_url(file_path: Path, server_url: Optional[str] = None) -> Optional[str]:
|
||||||
|
"""Get the HTTP URL for a file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: Path to the file
|
||||||
|
server_url: Base server URL (gets determined if None)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
HTTP URL to the file, or None if server not running
|
||||||
|
"""
|
||||||
|
if not file_path.exists():
|
||||||
|
logger.warning(f"File does not exist: {file_path}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if server_url is None:
|
||||||
|
local_ip = get_local_ip()
|
||||||
|
if not local_ip:
|
||||||
|
logger.error("Cannot determine local IP for file URL")
|
||||||
|
return None
|
||||||
|
server_url = f"http://{local_ip}:{_server_port}"
|
||||||
|
|
||||||
|
# URL encode the file path
|
||||||
|
encoded_path = urllib.parse.quote(str(file_path.resolve()))
|
||||||
|
return f"{server_url}/{encoded_path}"
|
||||||
1039
helper/file_storage.py
Normal file
1039
helper/file_storage.py
Normal file
File diff suppressed because it is too large
Load Diff
579
helper/http_client.py
Normal file
579
helper/http_client.py
Normal file
@@ -0,0 +1,579 @@
|
|||||||
|
"""
|
||||||
|
Unified HTTP client for downlow using httpx.
|
||||||
|
|
||||||
|
Provides synchronous and asynchronous HTTP operations with:
|
||||||
|
- Automatic retries on transient failures
|
||||||
|
- Configurable timeouts and headers
|
||||||
|
- Built-in progress tracking for downloads
|
||||||
|
- Request/response logging support
|
||||||
|
"""
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import asyncio
|
||||||
|
from typing import Optional, Dict, Any, Callable, BinaryIO
|
||||||
|
from pathlib import Path
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Default configuration
|
||||||
|
DEFAULT_TIMEOUT = 30.0
|
||||||
|
DEFAULT_RETRIES = 3
|
||||||
|
DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPClient:
|
||||||
|
"""Unified HTTP client with sync support."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
timeout: float = DEFAULT_TIMEOUT,
|
||||||
|
retries: int = DEFAULT_RETRIES,
|
||||||
|
user_agent: str = DEFAULT_USER_AGENT,
|
||||||
|
verify_ssl: bool = True,
|
||||||
|
headers: Optional[Dict[str, str]] = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Initialize HTTP client.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
timeout: Request timeout in seconds
|
||||||
|
retries: Number of retries on transient failures
|
||||||
|
user_agent: User-Agent header value
|
||||||
|
verify_ssl: Whether to verify SSL certificates
|
||||||
|
headers: Additional headers to include in all requests
|
||||||
|
"""
|
||||||
|
self.timeout = timeout
|
||||||
|
self.retries = retries
|
||||||
|
self.user_agent = user_agent
|
||||||
|
self.verify_ssl = verify_ssl
|
||||||
|
self.base_headers = headers or {}
|
||||||
|
self._client: Optional[httpx.Client] = None
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
"""Context manager entry."""
|
||||||
|
self._client = httpx.Client(
|
||||||
|
timeout=self.timeout,
|
||||||
|
verify=self.verify_ssl,
|
||||||
|
headers=self._get_headers(),
|
||||||
|
)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
"""Context manager exit."""
|
||||||
|
if self._client:
|
||||||
|
self._client.close()
|
||||||
|
self._client = None
|
||||||
|
|
||||||
|
def _get_headers(self) -> Dict[str, str]:
|
||||||
|
"""Get request headers with user-agent."""
|
||||||
|
headers = {"User-Agent": self.user_agent}
|
||||||
|
headers.update(self.base_headers)
|
||||||
|
return headers
|
||||||
|
|
||||||
|
def get(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
params: Optional[Dict[str, Any]] = None,
|
||||||
|
headers: Optional[Dict[str, str]] = None,
|
||||||
|
allow_redirects: bool = True,
|
||||||
|
) -> httpx.Response:
|
||||||
|
"""
|
||||||
|
Make a GET request.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: Request URL
|
||||||
|
params: Query parameters
|
||||||
|
headers: Additional headers
|
||||||
|
allow_redirects: Follow redirects
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
httpx.Response object
|
||||||
|
"""
|
||||||
|
return self._request(
|
||||||
|
"GET",
|
||||||
|
url,
|
||||||
|
params=params,
|
||||||
|
headers=headers,
|
||||||
|
follow_redirects=allow_redirects,
|
||||||
|
)
|
||||||
|
|
||||||
|
def post(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
data: Optional[Any] = None,
|
||||||
|
json: Optional[Dict] = None,
|
||||||
|
files: Optional[Dict] = None,
|
||||||
|
headers: Optional[Dict[str, str]] = None,
|
||||||
|
) -> httpx.Response:
|
||||||
|
"""
|
||||||
|
Make a POST request.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: Request URL
|
||||||
|
data: Form data
|
||||||
|
json: JSON data
|
||||||
|
files: Files to upload
|
||||||
|
headers: Additional headers
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
httpx.Response object
|
||||||
|
"""
|
||||||
|
return self._request(
|
||||||
|
"POST",
|
||||||
|
url,
|
||||||
|
data=data,
|
||||||
|
json=json,
|
||||||
|
files=files,
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
|
||||||
|
def put(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
data: Optional[Any] = None,
|
||||||
|
json: Optional[Dict] = None,
|
||||||
|
content: Optional[Any] = None,
|
||||||
|
files: Optional[Dict] = None,
|
||||||
|
headers: Optional[Dict[str, str]] = None,
|
||||||
|
) -> httpx.Response:
|
||||||
|
"""
|
||||||
|
Make a PUT request.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: Request URL
|
||||||
|
data: Form data
|
||||||
|
json: JSON data
|
||||||
|
content: Raw content
|
||||||
|
files: Files to upload
|
||||||
|
headers: Additional headers
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
httpx.Response object
|
||||||
|
"""
|
||||||
|
return self._request(
|
||||||
|
"PUT",
|
||||||
|
url,
|
||||||
|
data=data,
|
||||||
|
json=json,
|
||||||
|
content=content,
|
||||||
|
files=files,
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
|
||||||
|
def delete(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
headers: Optional[Dict[str, str]] = None,
|
||||||
|
) -> httpx.Response:
|
||||||
|
"""
|
||||||
|
Make a DELETE request.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: Request URL
|
||||||
|
headers: Additional headers
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
httpx.Response object
|
||||||
|
"""
|
||||||
|
return self._request(
|
||||||
|
"DELETE",
|
||||||
|
url,
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
|
||||||
|
def request(
|
||||||
|
self,
|
||||||
|
method: str,
|
||||||
|
url: str,
|
||||||
|
**kwargs
|
||||||
|
) -> httpx.Response:
|
||||||
|
"""
|
||||||
|
Make a generic HTTP request.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
method: HTTP method
|
||||||
|
url: Request URL
|
||||||
|
**kwargs: Additional arguments
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
httpx.Response object
|
||||||
|
"""
|
||||||
|
return self._request(method, url, **kwargs)
|
||||||
|
|
||||||
|
def download(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
file_path: str,
|
||||||
|
chunk_size: int = 8192,
|
||||||
|
progress_callback: Optional[Callable[[int, int], None]] = None,
|
||||||
|
headers: Optional[Dict[str, str]] = None,
|
||||||
|
) -> Path:
|
||||||
|
"""
|
||||||
|
Download a file from URL with optional progress tracking.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: File URL
|
||||||
|
file_path: Local file path to save to
|
||||||
|
chunk_size: Download chunk size
|
||||||
|
progress_callback: Callback(bytes_downloaded, total_bytes)
|
||||||
|
headers: Additional headers
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Path object of downloaded file
|
||||||
|
"""
|
||||||
|
path = Path(file_path)
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
with self._request_stream("GET", url, headers=headers, follow_redirects=True) as response:
|
||||||
|
response.raise_for_status()
|
||||||
|
total_bytes = int(response.headers.get("content-length", 0))
|
||||||
|
bytes_downloaded = 0
|
||||||
|
|
||||||
|
with open(path, "wb") as f:
|
||||||
|
for chunk in response.iter_bytes(chunk_size):
|
||||||
|
if chunk:
|
||||||
|
f.write(chunk)
|
||||||
|
bytes_downloaded += len(chunk)
|
||||||
|
if progress_callback:
|
||||||
|
progress_callback(bytes_downloaded, total_bytes)
|
||||||
|
|
||||||
|
return path
|
||||||
|
|
||||||
|
def _request(
|
||||||
|
self,
|
||||||
|
method: str,
|
||||||
|
url: str,
|
||||||
|
**kwargs
|
||||||
|
) -> httpx.Response:
|
||||||
|
"""
|
||||||
|
Make an HTTP request with automatic retries.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
method: HTTP method
|
||||||
|
url: Request URL
|
||||||
|
**kwargs: Additional arguments for httpx.Client.request()
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
httpx.Response object
|
||||||
|
"""
|
||||||
|
if not self._client:
|
||||||
|
raise RuntimeError("HTTPClient must be used with context manager (with statement)")
|
||||||
|
|
||||||
|
# Merge headers
|
||||||
|
if "headers" in kwargs and kwargs["headers"]:
|
||||||
|
headers = self._get_headers()
|
||||||
|
headers.update(kwargs["headers"])
|
||||||
|
kwargs["headers"] = headers
|
||||||
|
else:
|
||||||
|
kwargs["headers"] = self._get_headers()
|
||||||
|
|
||||||
|
last_exception = None
|
||||||
|
|
||||||
|
for attempt in range(self.retries):
|
||||||
|
try:
|
||||||
|
response = self._client.request(method, url, **kwargs)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response
|
||||||
|
except httpx.TimeoutException as e:
|
||||||
|
last_exception = e
|
||||||
|
logger.warning(f"Timeout on attempt {attempt + 1}/{self.retries}: {url}")
|
||||||
|
if attempt < self.retries - 1:
|
||||||
|
continue
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
# Don't retry on 4xx errors
|
||||||
|
if 400 <= e.response.status_code < 500:
|
||||||
|
try:
|
||||||
|
response_text = e.response.text[:500]
|
||||||
|
except:
|
||||||
|
response_text = "<unable to read response>"
|
||||||
|
logger.error(f"HTTP {e.response.status_code} from {url}: {response_text}")
|
||||||
|
raise
|
||||||
|
last_exception = e
|
||||||
|
try:
|
||||||
|
response_text = e.response.text[:200]
|
||||||
|
except:
|
||||||
|
response_text = "<unable to read response>"
|
||||||
|
logger.warning(f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}")
|
||||||
|
if attempt < self.retries - 1:
|
||||||
|
continue
|
||||||
|
except (httpx.RequestError, httpx.ConnectError) as e:
|
||||||
|
last_exception = e
|
||||||
|
logger.warning(f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}")
|
||||||
|
if attempt < self.retries - 1:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if last_exception:
|
||||||
|
logger.error(f"Request failed after {self.retries} attempts: {url} - {last_exception}")
|
||||||
|
raise last_exception
|
||||||
|
|
||||||
|
raise RuntimeError("Request failed after retries")
|
||||||
|
|
||||||
|
def _request_stream(self, method: str, url: str, **kwargs):
|
||||||
|
"""Make a streaming request."""
|
||||||
|
if not self._client:
|
||||||
|
raise RuntimeError("HTTPClient must be used with context manager (with statement)")
|
||||||
|
|
||||||
|
# Merge headers
|
||||||
|
if "headers" in kwargs and kwargs["headers"]:
|
||||||
|
headers = self._get_headers()
|
||||||
|
headers.update(kwargs["headers"])
|
||||||
|
kwargs["headers"] = headers
|
||||||
|
else:
|
||||||
|
kwargs["headers"] = self._get_headers()
|
||||||
|
|
||||||
|
return self._client.stream(method, url, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class AsyncHTTPClient:
|
||||||
|
"""Unified async HTTP client with asyncio support."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
timeout: float = DEFAULT_TIMEOUT,
|
||||||
|
retries: int = DEFAULT_RETRIES,
|
||||||
|
user_agent: str = DEFAULT_USER_AGENT,
|
||||||
|
verify_ssl: bool = True,
|
||||||
|
headers: Optional[Dict[str, str]] = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Initialize async HTTP client.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
timeout: Request timeout in seconds
|
||||||
|
retries: Number of retries on transient failures
|
||||||
|
user_agent: User-Agent header value
|
||||||
|
verify_ssl: Whether to verify SSL certificates
|
||||||
|
headers: Additional headers to include in all requests
|
||||||
|
"""
|
||||||
|
self.timeout = timeout
|
||||||
|
self.retries = retries
|
||||||
|
self.user_agent = user_agent
|
||||||
|
self.verify_ssl = verify_ssl
|
||||||
|
self.base_headers = headers or {}
|
||||||
|
self._client: Optional[httpx.AsyncClient] = None
|
||||||
|
|
||||||
|
async def __aenter__(self):
|
||||||
|
"""Async context manager entry."""
|
||||||
|
self._client = httpx.AsyncClient(
|
||||||
|
timeout=self.timeout,
|
||||||
|
verify=self.verify_ssl,
|
||||||
|
headers=self._get_headers(),
|
||||||
|
)
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
"""Async context manager exit."""
|
||||||
|
if self._client:
|
||||||
|
await self._client.aclose()
|
||||||
|
self._client = None
|
||||||
|
|
||||||
|
def _get_headers(self) -> Dict[str, str]:
|
||||||
|
"""Get request headers with user-agent."""
|
||||||
|
headers = {"User-Agent": self.user_agent}
|
||||||
|
headers.update(self.base_headers)
|
||||||
|
return headers
|
||||||
|
|
||||||
|
async def get(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
params: Optional[Dict[str, Any]] = None,
|
||||||
|
headers: Optional[Dict[str, str]] = None,
|
||||||
|
allow_redirects: bool = True,
|
||||||
|
) -> httpx.Response:
|
||||||
|
"""
|
||||||
|
Make an async GET request.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: Request URL
|
||||||
|
params: Query parameters
|
||||||
|
headers: Additional headers
|
||||||
|
allow_redirects: Follow redirects
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
httpx.Response object
|
||||||
|
"""
|
||||||
|
return await self._request(
|
||||||
|
"GET",
|
||||||
|
url,
|
||||||
|
params=params,
|
||||||
|
headers=headers,
|
||||||
|
follow_redirects=allow_redirects,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def post(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
data: Optional[Any] = None,
|
||||||
|
json: Optional[Dict] = None,
|
||||||
|
headers: Optional[Dict[str, str]] = None,
|
||||||
|
) -> httpx.Response:
|
||||||
|
"""
|
||||||
|
Make an async POST request.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: Request URL
|
||||||
|
data: Form data
|
||||||
|
json: JSON data
|
||||||
|
headers: Additional headers
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
httpx.Response object
|
||||||
|
"""
|
||||||
|
return await self._request(
|
||||||
|
"POST",
|
||||||
|
url,
|
||||||
|
data=data,
|
||||||
|
json=json,
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def download(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
file_path: str,
|
||||||
|
chunk_size: int = 8192,
|
||||||
|
progress_callback: Optional[Callable[[int, int], None]] = None,
|
||||||
|
headers: Optional[Dict[str, str]] = None,
|
||||||
|
) -> Path:
|
||||||
|
"""
|
||||||
|
Download a file from URL asynchronously with optional progress tracking.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: File URL
|
||||||
|
file_path: Local file path to save to
|
||||||
|
chunk_size: Download chunk size
|
||||||
|
progress_callback: Callback(bytes_downloaded, total_bytes)
|
||||||
|
headers: Additional headers
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Path object of downloaded file
|
||||||
|
"""
|
||||||
|
path = Path(file_path)
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
async with self._request_stream("GET", url, headers=headers) as response:
|
||||||
|
response.raise_for_status()
|
||||||
|
total_bytes = int(response.headers.get("content-length", 0))
|
||||||
|
bytes_downloaded = 0
|
||||||
|
|
||||||
|
with open(path, "wb") as f:
|
||||||
|
async for chunk in response.aiter_bytes(chunk_size):
|
||||||
|
if chunk:
|
||||||
|
f.write(chunk)
|
||||||
|
bytes_downloaded += len(chunk)
|
||||||
|
if progress_callback:
|
||||||
|
progress_callback(bytes_downloaded, total_bytes)
|
||||||
|
|
||||||
|
return path
|
||||||
|
|
||||||
|
async def _request(
|
||||||
|
self,
|
||||||
|
method: str,
|
||||||
|
url: str,
|
||||||
|
**kwargs
|
||||||
|
) -> httpx.Response:
|
||||||
|
"""
|
||||||
|
Make an async HTTP request with automatic retries.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
method: HTTP method
|
||||||
|
url: Request URL
|
||||||
|
**kwargs: Additional arguments for httpx.AsyncClient.request()
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
httpx.Response object
|
||||||
|
"""
|
||||||
|
if not self._client:
|
||||||
|
raise RuntimeError("AsyncHTTPClient must be used with async context manager")
|
||||||
|
|
||||||
|
# Merge headers
|
||||||
|
if "headers" in kwargs and kwargs["headers"]:
|
||||||
|
headers = self._get_headers()
|
||||||
|
headers.update(kwargs["headers"])
|
||||||
|
kwargs["headers"] = headers
|
||||||
|
else:
|
||||||
|
kwargs["headers"] = self._get_headers()
|
||||||
|
|
||||||
|
last_exception = None
|
||||||
|
|
||||||
|
for attempt in range(self.retries):
|
||||||
|
try:
|
||||||
|
response = await self._client.request(method, url, **kwargs)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response
|
||||||
|
except httpx.TimeoutException as e:
|
||||||
|
last_exception = e
|
||||||
|
logger.warning(f"Timeout on attempt {attempt + 1}/{self.retries}: {url}")
|
||||||
|
if attempt < self.retries - 1:
|
||||||
|
await asyncio.sleep(0.5) # Brief delay before retry
|
||||||
|
continue
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
# Don't retry on 4xx errors
|
||||||
|
if 400 <= e.response.status_code < 500:
|
||||||
|
try:
|
||||||
|
response_text = e.response.text[:500]
|
||||||
|
except:
|
||||||
|
response_text = "<unable to read response>"
|
||||||
|
logger.error(f"HTTP {e.response.status_code} from {url}: {response_text}")
|
||||||
|
raise
|
||||||
|
last_exception = e
|
||||||
|
try:
|
||||||
|
response_text = e.response.text[:200]
|
||||||
|
except:
|
||||||
|
response_text = "<unable to read response>"
|
||||||
|
logger.warning(f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}")
|
||||||
|
if attempt < self.retries - 1:
|
||||||
|
await asyncio.sleep(0.5)
|
||||||
|
continue
|
||||||
|
except (httpx.RequestError, httpx.ConnectError) as e:
|
||||||
|
last_exception = e
|
||||||
|
logger.warning(f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}")
|
||||||
|
if attempt < self.retries - 1:
|
||||||
|
await asyncio.sleep(0.5)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if last_exception:
|
||||||
|
logger.error(f"Request failed after {self.retries} attempts: {url} - {last_exception}")
|
||||||
|
raise last_exception
|
||||||
|
|
||||||
|
raise RuntimeError("Request failed after retries")
|
||||||
|
|
||||||
|
def _request_stream(self, method: str, url: str, **kwargs):
|
||||||
|
"""Make a streaming request."""
|
||||||
|
if not self._client:
|
||||||
|
raise RuntimeError("AsyncHTTPClient must be used with async context manager")
|
||||||
|
|
||||||
|
# Merge headers
|
||||||
|
if "headers" in kwargs and kwargs["headers"]:
|
||||||
|
headers = self._get_headers()
|
||||||
|
headers.update(kwargs["headers"])
|
||||||
|
kwargs["headers"] = headers
|
||||||
|
else:
|
||||||
|
kwargs["headers"] = self._get_headers()
|
||||||
|
|
||||||
|
return self._client.stream(method, url, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
# Convenience function for quick sync requests
|
||||||
|
def get(url: str, **kwargs) -> httpx.Response:
|
||||||
|
"""Quick GET request without context manager."""
|
||||||
|
with HTTPClient() as client:
|
||||||
|
return client.get(url, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def post(url: str, **kwargs) -> httpx.Response:
|
||||||
|
"""Quick POST request without context manager."""
|
||||||
|
with HTTPClient() as client:
|
||||||
|
return client.post(url, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def download(
|
||||||
|
url: str,
|
||||||
|
file_path: str,
|
||||||
|
progress_callback: Optional[Callable[[int, int], None]] = None,
|
||||||
|
**kwargs
|
||||||
|
) -> Path:
|
||||||
|
"""Quick file download without context manager."""
|
||||||
|
with HTTPClient() as client:
|
||||||
|
return client.download(url, file_path, progress_callback=progress_callback, **kwargs)
|
||||||
1553
helper/hydrus.py
Normal file
1553
helper/hydrus.py
Normal file
File diff suppressed because it is too large
Load Diff
377
helper/libgen_service.py
Normal file
377
helper/libgen_service.py
Normal file
@@ -0,0 +1,377 @@
|
|||||||
|
"""Shared Library Genesis search and download helpers."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Callable, Dict, Iterable, List, Optional
|
||||||
|
import logging
|
||||||
|
import requests
|
||||||
|
from urllib.parse import quote, urljoin
|
||||||
|
|
||||||
|
from libgen import search_sync, LibgenError
|
||||||
|
|
||||||
|
LogFn = Optional[Callable[[str], None]]
|
||||||
|
ErrorFn = Optional[Callable[[str], None]]
|
||||||
|
|
||||||
|
DEFAULT_TIMEOUT = 10.0
|
||||||
|
DEFAULT_LIMIT = 50
|
||||||
|
|
||||||
|
logging.getLogger(__name__).setLevel(logging.WARNING)
|
||||||
|
|
||||||
|
|
||||||
|
def _call(logger: LogFn, message: str) -> None:
|
||||||
|
if logger:
|
||||||
|
logger(message)
|
||||||
|
|
||||||
|
|
||||||
|
def search_libgen_no_ads(query: str, session: Optional[requests.Session] = None) -> List[Dict[str, Any]]:
|
||||||
|
"""Search Libgen without triggering ads.php requests."""
|
||||||
|
try:
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
except ImportError: # pragma: no cover
|
||||||
|
logging.warning("BeautifulSoup not available; falling back to standard search")
|
||||||
|
return []
|
||||||
|
|
||||||
|
mirrors = [
|
||||||
|
"https://libgen.gl",
|
||||||
|
"https://libgen.vg",
|
||||||
|
"https://libgen.la",
|
||||||
|
"https://libgen.bz",
|
||||||
|
"https://libgen.gs",
|
||||||
|
]
|
||||||
|
|
||||||
|
session = session or requests.Session()
|
||||||
|
session.headers.setdefault(
|
||||||
|
"User-Agent",
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
||||||
|
)
|
||||||
|
|
||||||
|
for mirror in mirrors:
|
||||||
|
try:
|
||||||
|
search_url = f"{mirror}/index.php?req={quote(query)}&res=100&covers=on&filesuns=all"
|
||||||
|
response = session.get(search_url, timeout=DEFAULT_TIMEOUT)
|
||||||
|
if response.status_code != 200:
|
||||||
|
continue
|
||||||
|
|
||||||
|
soup = BeautifulSoup(response.content, "html.parser")
|
||||||
|
table = soup.find("table", {"class": "catalog"})
|
||||||
|
if table is None:
|
||||||
|
for candidate in soup.find_all("table"):
|
||||||
|
rows = candidate.find_all("tr")
|
||||||
|
if len(rows) > 2:
|
||||||
|
table = candidate
|
||||||
|
break
|
||||||
|
if table is None:
|
||||||
|
logging.debug("[libgen_no_ads] No results table on %s", mirror)
|
||||||
|
continue
|
||||||
|
|
||||||
|
rows = table.find_all("tr")[1:]
|
||||||
|
results: List[Dict[str, Any]] = []
|
||||||
|
for row in rows:
|
||||||
|
try:
|
||||||
|
cells = row.find_all("td")
|
||||||
|
if len(cells) < 9:
|
||||||
|
continue
|
||||||
|
|
||||||
|
size_cell = cells[7]
|
||||||
|
file_link = size_cell.find("a")
|
||||||
|
mirror_link = ""
|
||||||
|
if file_link:
|
||||||
|
href = str(file_link.get("href", ""))
|
||||||
|
if href.startswith("/"):
|
||||||
|
mirror_link = mirror + href
|
||||||
|
elif href:
|
||||||
|
mirror_link = urljoin(mirror, href)
|
||||||
|
|
||||||
|
if not mirror_link:
|
||||||
|
title_link = cells[1].find("a") if len(cells) > 1 else None
|
||||||
|
if title_link:
|
||||||
|
href = str(title_link.get("href", ""))
|
||||||
|
if href.startswith("/"):
|
||||||
|
mirror_link = mirror + href
|
||||||
|
elif href:
|
||||||
|
mirror_link = urljoin(mirror, href)
|
||||||
|
|
||||||
|
if not mirror_link:
|
||||||
|
continue
|
||||||
|
|
||||||
|
results.append(
|
||||||
|
{
|
||||||
|
"id": "",
|
||||||
|
"mirror": mirror_link,
|
||||||
|
"cover": "",
|
||||||
|
"title": cells[1].get_text(strip=True) if len(cells) > 1 else "Unknown",
|
||||||
|
"authors": [cells[2].get_text(strip=True)]
|
||||||
|
if len(cells) > 2
|
||||||
|
else ["Unknown"],
|
||||||
|
"publisher": cells[3].get_text(strip=True) if len(cells) > 3 else "",
|
||||||
|
"year": cells[4].get_text(strip=True) if len(cells) > 4 else "",
|
||||||
|
"pages": cells[6].get_text(strip=True) if len(cells) > 6 else "",
|
||||||
|
"language": cells[5].get_text(strip=True) if len(cells) > 5 else "",
|
||||||
|
"size": cells[7].get_text(strip=True) if len(cells) > 7 else "",
|
||||||
|
"extension": cells[8].get_text(strip=True) if len(cells) > 8 else "",
|
||||||
|
"isbn": "",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
except Exception as exc: # pragma: no cover - defensive
|
||||||
|
logging.debug("[libgen_no_ads] Error parsing row: %s", exc)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if results:
|
||||||
|
logging.info("[libgen_no_ads] %d results from %s", len(results), mirror)
|
||||||
|
return results
|
||||||
|
except Exception as exc: # pragma: no cover - mirror issues
|
||||||
|
logging.debug("[libgen_no_ads] Mirror %s failed: %s", mirror, exc)
|
||||||
|
continue
|
||||||
|
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def format_book_info(book: Any) -> Dict[str, Any]:
|
||||||
|
"""Format Libgen search result into a consistent dictionary."""
|
||||||
|
filesize_bytes = 0
|
||||||
|
size_str = getattr(book, "size", "") or ""
|
||||||
|
if size_str:
|
||||||
|
parts = size_str.strip().split()
|
||||||
|
try:
|
||||||
|
value = float(parts[0])
|
||||||
|
unit = parts[1].upper() if len(parts) > 1 else "B"
|
||||||
|
if unit in {"MB", "M"}:
|
||||||
|
filesize_bytes = int(value * 1024 * 1024)
|
||||||
|
elif unit in {"GB", "G"}:
|
||||||
|
filesize_bytes = int(value * 1024 * 1024 * 1024)
|
||||||
|
elif unit in {"KB", "K"}:
|
||||||
|
filesize_bytes = int(value * 1024)
|
||||||
|
else:
|
||||||
|
filesize_bytes = int(value)
|
||||||
|
except (ValueError, IndexError): # pragma: no cover - defensive
|
||||||
|
filesize_bytes = 0
|
||||||
|
|
||||||
|
title = getattr(book, "title", "") or ""
|
||||||
|
isbn = getattr(book, "isbn", "") or ""
|
||||||
|
if not isbn and title:
|
||||||
|
import re
|
||||||
|
|
||||||
|
match = re.search(
|
||||||
|
r"((?:[\d]{10,13}(?:\s*[;,]\s*[\d]{10,13})+)|(?:[\d]{10,13})(?:\s*[;,]?\s*[\d\-]{0,50})?)\s*(?:\b|$)",
|
||||||
|
title,
|
||||||
|
)
|
||||||
|
if match:
|
||||||
|
potential_isbn = match.group(0).strip()
|
||||||
|
if re.search(r"\d{10,13}", potential_isbn):
|
||||||
|
isbn = potential_isbn
|
||||||
|
title = re.sub(r"\s+[a-z]\s*$", "", title[: match.start()].strip(), flags=re.IGNORECASE)
|
||||||
|
|
||||||
|
authors_value = getattr(book, "authors", None)
|
||||||
|
if isinstance(authors_value, Iterable) and not isinstance(authors_value, str):
|
||||||
|
authors_str = ", ".join(str(author) for author in authors_value)
|
||||||
|
else:
|
||||||
|
authors_str = str(authors_value or "Unknown")
|
||||||
|
|
||||||
|
download_links = getattr(book, "download_links", None)
|
||||||
|
mirror_url = None
|
||||||
|
if download_links and getattr(download_links, "get_link", None):
|
||||||
|
mirror_url = download_links.get_link
|
||||||
|
|
||||||
|
return {
|
||||||
|
"title": title or "Unknown",
|
||||||
|
"author": authors_str,
|
||||||
|
"publisher": getattr(book, "publisher", "") or "",
|
||||||
|
"year": getattr(book, "year", "") or "",
|
||||||
|
"pages": getattr(book, "pages", "") or "",
|
||||||
|
"language": getattr(book, "language", "") or "",
|
||||||
|
"filesize": filesize_bytes,
|
||||||
|
"filesize_str": size_str or "Unknown",
|
||||||
|
"extension": getattr(book, "extension", "") or "",
|
||||||
|
"isbn": isbn,
|
||||||
|
"mirror_url": mirror_url,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def search_libgen(
|
||||||
|
query: str,
|
||||||
|
limit: int = DEFAULT_LIMIT,
|
||||||
|
*,
|
||||||
|
log_info: LogFn = None,
|
||||||
|
log_error: ErrorFn = None,
|
||||||
|
session: Optional[requests.Session] = None,
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""Search Libgen returning formatted dictionaries with multiple mirrors.
|
||||||
|
|
||||||
|
Uses HTML scraper (search_libgen_no_ads) to find books quickly.
|
||||||
|
Returns mirror URLs and book IDs that can be used to generate alternative mirrors.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
_call(log_info, f"[search] Searching Libgen for: {query}")
|
||||||
|
session = session or requests.Session()
|
||||||
|
|
||||||
|
# Use HTML scraper - more reliable and doesn't hang on mirror resolution
|
||||||
|
_call(log_info, "[search] Using HTML scraper (search_libgen_no_ads)...")
|
||||||
|
results: List[Any] = search_libgen_no_ads(query, session=session)
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
_call(log_info, "[search] No results from HTML scraper")
|
||||||
|
return []
|
||||||
|
|
||||||
|
formatted: List[Dict[str, Any]] = []
|
||||||
|
mirrors_list = [
|
||||||
|
"https://libgen.gl",
|
||||||
|
"https://libgen.vg",
|
||||||
|
"https://libgen.la",
|
||||||
|
"https://libgen.bz",
|
||||||
|
"https://libgen.gs",
|
||||||
|
]
|
||||||
|
|
||||||
|
for book in results[:limit]:
|
||||||
|
if isinstance(book, dict):
|
||||||
|
# Result from search_libgen_no_ads (HTML scraper)
|
||||||
|
authors = book.get("authors", ["Unknown"])
|
||||||
|
if isinstance(authors, list):
|
||||||
|
author_value = ", ".join(str(a) for a in authors)
|
||||||
|
else:
|
||||||
|
author_value = str(authors)
|
||||||
|
|
||||||
|
# Extract book ID from mirror URL if available
|
||||||
|
mirror = book.get("mirror", "")
|
||||||
|
book_id = ""
|
||||||
|
if mirror and "/file.php?id=" in mirror:
|
||||||
|
try:
|
||||||
|
book_id = mirror.split("/file.php?id=")[1].split("&")[0]
|
||||||
|
except (IndexError, ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Build list of alternative mirrors based on book ID
|
||||||
|
mirrors_dict = {}
|
||||||
|
if book_id:
|
||||||
|
for mirror_base in mirrors_list:
|
||||||
|
mirrors_dict[mirror_base] = f"{mirror_base}/file.php?id={book_id}"
|
||||||
|
elif mirror:
|
||||||
|
# Fallback: use the mirror we found
|
||||||
|
mirrors_dict["primary"] = mirror
|
||||||
|
|
||||||
|
formatted.append(
|
||||||
|
{
|
||||||
|
"title": book.get("title", "Unknown"),
|
||||||
|
"author": author_value,
|
||||||
|
"publisher": book.get("publisher", ""),
|
||||||
|
"year": book.get("year", ""),
|
||||||
|
"pages": book.get("pages", ""),
|
||||||
|
"language": book.get("language", ""),
|
||||||
|
"filesize": 0,
|
||||||
|
"filesize_str": book.get("size", "Unknown"),
|
||||||
|
"extension": book.get("extension", ""),
|
||||||
|
"isbn": book.get("isbn", ""),
|
||||||
|
"mirror_url": mirror, # Primary mirror
|
||||||
|
"mirrors": mirrors_dict, # Alternative mirrors
|
||||||
|
"book_id": book_id,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Fallback: try to format as book object
|
||||||
|
try:
|
||||||
|
formatted.append(format_book_info(book))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
_call(log_info, f"[search] Found {len(formatted)} result(s)")
|
||||||
|
return formatted
|
||||||
|
except LibgenError as exc:
|
||||||
|
_call(log_error, f"[search] Libgen error: {exc}")
|
||||||
|
return []
|
||||||
|
except Exception as exc: # pragma: no cover - defensive
|
||||||
|
_call(log_error, f"[search] Error: {exc}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def download_from_mirror(
|
||||||
|
mirror_url: str,
|
||||||
|
output_path: str | Path,
|
||||||
|
*,
|
||||||
|
log_info: LogFn = None,
|
||||||
|
log_error: ErrorFn = None,
|
||||||
|
session: Optional[requests.Session] = None,
|
||||||
|
) -> bool:
|
||||||
|
"""Download a Libgen file and write it to disk.
|
||||||
|
|
||||||
|
Handles Libgen redirects and ensures proper file download by:
|
||||||
|
- Following all redirects (default behavior)
|
||||||
|
- Setting User-Agent header (required by some mirrors)
|
||||||
|
- Validating that we're downloading binary content, not HTML
|
||||||
|
- Attempting alternative download method if HTML is returned
|
||||||
|
"""
|
||||||
|
session = session or requests.Session()
|
||||||
|
try:
|
||||||
|
output_path = Path(output_path)
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
_call(log_info, f"[download] Downloading from mirror: {mirror_url}")
|
||||||
|
|
||||||
|
# Ensure session has proper headers for Libgen
|
||||||
|
if 'User-Agent' not in session.headers:
|
||||||
|
session.headers['User-Agent'] = (
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||||
|
"(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Download with redirects enabled (default) and referer
|
||||||
|
session.headers['Referer'] = 'https://libgen.gs/'
|
||||||
|
response = session.get(mirror_url, stream=True, timeout=30, allow_redirects=True)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
# Check if we got HTML instead of a file (common Libgen issue)
|
||||||
|
content_type = response.headers.get('content-type', '').lower()
|
||||||
|
if 'text/html' in content_type:
|
||||||
|
_call(log_error, f"[download] Server returned HTML. Trying alternative method...")
|
||||||
|
|
||||||
|
# Try to extract file ID and use alternative CDN
|
||||||
|
try:
|
||||||
|
# Parse the HTML to extract MD5 or file ID
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
|
||||||
|
# Look for download link in the HTML
|
||||||
|
# Common patterns: md5 hash in form, or direct link in anchor tags
|
||||||
|
download_link = None
|
||||||
|
|
||||||
|
# Try to find forms that might contain download functionality
|
||||||
|
forms = soup.find_all('form')
|
||||||
|
for form in forms:
|
||||||
|
action = form.get('action', '')
|
||||||
|
if 'download' in action.lower() or 'get' in action.lower():
|
||||||
|
download_link = action
|
||||||
|
break
|
||||||
|
|
||||||
|
if not download_link:
|
||||||
|
_call(log_error, f"[download] Could not extract alternative download link from HTML")
|
||||||
|
return False
|
||||||
|
|
||||||
|
_call(log_info, f"[download] Using alternative download method: {download_link[:100]}")
|
||||||
|
# Try downloading from alternative link
|
||||||
|
response2 = session.get(download_link, stream=True, timeout=30, allow_redirects=True)
|
||||||
|
response2.raise_for_status()
|
||||||
|
response = response2 # Use the new response
|
||||||
|
|
||||||
|
except Exception as alt_error:
|
||||||
|
_call(log_error, f"[download] Alternative method failed: {alt_error}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
total_size = int(response.headers.get("content-length", 0))
|
||||||
|
downloaded = 0
|
||||||
|
|
||||||
|
with open(output_path, "wb") as handle:
|
||||||
|
for chunk in response.iter_content(chunk_size=8192):
|
||||||
|
if not chunk:
|
||||||
|
continue
|
||||||
|
handle.write(chunk)
|
||||||
|
downloaded += len(chunk)
|
||||||
|
if total_size > 0:
|
||||||
|
percent = downloaded / total_size * 100
|
||||||
|
_call(
|
||||||
|
log_info,
|
||||||
|
f"[download] {percent:.1f}% - {downloaded // (1024*1024)}MB / {total_size // (1024*1024)}MB",
|
||||||
|
)
|
||||||
|
|
||||||
|
_call(log_info, f"[download] Downloaded successfully to: {output_path}")
|
||||||
|
return True
|
||||||
|
except Exception as exc: # pragma: no cover - defensive
|
||||||
|
_call(log_error, f"[download] Error: {exc}")
|
||||||
|
return False
|
||||||
1395
helper/local_library.py
Normal file
1395
helper/local_library.py
Normal file
File diff suppressed because it is too large
Load Diff
70
helper/logger.py
Normal file
70
helper/logger.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
"""Unified logging utility for automatic file and function name tracking."""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import inspect
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
_DEBUG_ENABLED = False
|
||||||
|
|
||||||
|
def set_debug(enabled: bool) -> None:
|
||||||
|
"""Enable or disable debug logging."""
|
||||||
|
global _DEBUG_ENABLED
|
||||||
|
_DEBUG_ENABLED = enabled
|
||||||
|
|
||||||
|
def debug(*args, **kwargs) -> None:
|
||||||
|
"""Print debug message if debug logging is enabled.
|
||||||
|
|
||||||
|
Automatically prepends [filename.function_name] to all output.
|
||||||
|
"""
|
||||||
|
if not _DEBUG_ENABLED:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Set default to stderr for debug messages
|
||||||
|
if 'file' not in kwargs:
|
||||||
|
kwargs['file'] = sys.stderr
|
||||||
|
|
||||||
|
# Prepend DEBUG label
|
||||||
|
args = ("DEBUG:", *args)
|
||||||
|
|
||||||
|
# Use the same logic as log()
|
||||||
|
log(*args, **kwargs)
|
||||||
|
|
||||||
|
def log(*args, **kwargs) -> None:
|
||||||
|
"""Print with automatic file.function prefix.
|
||||||
|
|
||||||
|
Automatically prepends [filename.function_name] to all output.
|
||||||
|
Defaults to stdout if not specified.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
log("Upload started") # Output: [add_file.run] Upload started
|
||||||
|
"""
|
||||||
|
# Get the calling frame
|
||||||
|
frame = inspect.currentframe()
|
||||||
|
if frame is None:
|
||||||
|
print(*args, **kwargs)
|
||||||
|
return
|
||||||
|
|
||||||
|
caller_frame = frame.f_back
|
||||||
|
if caller_frame is None:
|
||||||
|
print(*args, **kwargs)
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get file name without extension
|
||||||
|
file_name = Path(caller_frame.f_code.co_filename).stem
|
||||||
|
|
||||||
|
# Get function name
|
||||||
|
func_name = caller_frame.f_code.co_name
|
||||||
|
|
||||||
|
# Set default to stdout if not specified
|
||||||
|
if 'file' not in kwargs:
|
||||||
|
kwargs['file'] = sys.stdout
|
||||||
|
|
||||||
|
# Build prefix
|
||||||
|
prefix = f"[{file_name}.{func_name}]"
|
||||||
|
|
||||||
|
# Print with prefix
|
||||||
|
print(prefix, *args, **kwargs)
|
||||||
|
finally:
|
||||||
|
del frame
|
||||||
|
del caller_frame
|
||||||
951
helper/mpv_file.py
Normal file
951
helper/mpv_file.py
Normal file
@@ -0,0 +1,951 @@
|
|||||||
|
"""MPV file metadata aggregation helpers."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, Iterable, List, Optional, Sequence
|
||||||
|
from urllib.parse import parse_qs, urlparse, unquote
|
||||||
|
|
||||||
|
from config import get_hydrus_url
|
||||||
|
from helper.utils import sha256_file, unique_preserve_order
|
||||||
|
from helper.hydrus import HydrusClient, HydrusRequestError
|
||||||
|
|
||||||
|
import metadata
|
||||||
|
|
||||||
|
|
||||||
|
class MPVFileError(RuntimeError):
|
||||||
|
"""Raised when we cannot construct an MPV file snapshot."""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class DebridMagnet:
|
||||||
|
"""Represents a magnet result from AllDebrid search.
|
||||||
|
|
||||||
|
This class matches the structure expected by the TUI (like Hydrus results)
|
||||||
|
with title, target, media_kind attributes for compatibility.
|
||||||
|
"""
|
||||||
|
magnet_id: str
|
||||||
|
title: str
|
||||||
|
size: int
|
||||||
|
status_code: int
|
||||||
|
status_text: str
|
||||||
|
progress: float
|
||||||
|
downloaded: int
|
||||||
|
seeders: int
|
||||||
|
dl_speed: int
|
||||||
|
tag_summary: Optional[str] = None
|
||||||
|
metadata: Optional[Dict[str, Any]] = None # Complete magnet file metadata from AllDebrid API
|
||||||
|
|
||||||
|
@property
|
||||||
|
def target(self) -> str:
|
||||||
|
"""Return the target URI for this magnet (used by TUI for access operations)."""
|
||||||
|
return f"alldebrid://{self.magnet_id}"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def media_kind(self) -> str:
|
||||||
|
"""Return media kind for display."""
|
||||||
|
return "magnet"
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
"""Convert to dictionary for metadata display."""
|
||||||
|
return {
|
||||||
|
"magnet_id": self.magnet_id,
|
||||||
|
"title": self.title,
|
||||||
|
"size": self.size,
|
||||||
|
"status_code": self.status_code,
|
||||||
|
"status_text": self.status_text,
|
||||||
|
"progress": f"{self.progress:.1f}%",
|
||||||
|
"downloaded": self.downloaded,
|
||||||
|
"seeders": self.seeders,
|
||||||
|
"dl_speed": self.dl_speed,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class HydrusSettings:
|
||||||
|
base_url: Optional[str]
|
||||||
|
access_key: Optional[str]
|
||||||
|
timeout: float
|
||||||
|
prefer_service_name: Optional[str]
|
||||||
|
include_relationships: bool
|
||||||
|
|
||||||
|
def as_metadata_options(self) -> Dict[str, Any]:
|
||||||
|
options: Dict[str, Any] = {
|
||||||
|
"timeout": self.timeout,
|
||||||
|
"include_relationships": self.include_relationships,
|
||||||
|
}
|
||||||
|
if self.prefer_service_name:
|
||||||
|
options["prefer_service_name"] = self.prefer_service_name
|
||||||
|
return options
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class MPVfile:
|
||||||
|
path: Optional[str] = None
|
||||||
|
filename: Optional[str] = None
|
||||||
|
type: str = "unknown"
|
||||||
|
hash: Optional[str] = None
|
||||||
|
local_path: Optional[str] = None
|
||||||
|
mpv_metadata: Dict[str, Any] = field(default_factory=dict)
|
||||||
|
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||||
|
remote_metadata: Optional[Dict[str, Any]] = None
|
||||||
|
relationships: Optional[Dict[str, Any]] = None
|
||||||
|
relationship_metadata: Dict[str, Any] = field(default_factory=dict)
|
||||||
|
tags: List[str] = field(default_factory=list)
|
||||||
|
original_tags: Dict[str, str] = field(default_factory=dict)
|
||||||
|
known_urls: List[str] = field(default_factory=list)
|
||||||
|
title: Optional[str] = None
|
||||||
|
source_url: Optional[str] = None
|
||||||
|
clip_time: Optional[str] = None
|
||||||
|
duration: Optional[float] = None
|
||||||
|
filesize_mb: Optional[float] = None
|
||||||
|
is_video: bool = False
|
||||||
|
is_audio: bool = False
|
||||||
|
is_deleted: Optional[bool] = None
|
||||||
|
is_local: Optional[bool] = None
|
||||||
|
has_current_file_service: Optional[bool] = None
|
||||||
|
tag_service_key: Optional[str] = None
|
||||||
|
swap_recommended: bool = False
|
||||||
|
warnings: List[str] = field(default_factory=list)
|
||||||
|
# New relationship fields for menu
|
||||||
|
king: Optional[str] = None
|
||||||
|
alts: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
payload: Dict[str, Any] = {
|
||||||
|
"path": self.path,
|
||||||
|
"filename": self.filename,
|
||||||
|
"type": self.type,
|
||||||
|
"hash": self.hash,
|
||||||
|
"local_path": self.local_path,
|
||||||
|
"mpv_metadata": self.mpv_metadata,
|
||||||
|
"metadata": self.metadata,
|
||||||
|
"remote_metadata": self.remote_metadata,
|
||||||
|
"relationships": self.relationships,
|
||||||
|
"relationship_metadata": self.relationship_metadata,
|
||||||
|
"tags": self.tags,
|
||||||
|
"original_tags": self.original_tags,
|
||||||
|
"known_urls": self.known_urls,
|
||||||
|
"title": self.title,
|
||||||
|
"source_url": self.source_url,
|
||||||
|
"clip_time": self.clip_time,
|
||||||
|
"duration": self.duration,
|
||||||
|
"filesize_mb": self.filesize_mb,
|
||||||
|
"is_video": self.is_video,
|
||||||
|
"is_audio": self.is_audio,
|
||||||
|
"is_deleted": self.is_deleted,
|
||||||
|
"is_local": self.is_local,
|
||||||
|
"has_current_file_service": self.has_current_file_service,
|
||||||
|
"tag_service_key": self.tag_service_key,
|
||||||
|
"swap_recommended": self.swap_recommended,
|
||||||
|
"warnings": self.warnings,
|
||||||
|
# relationship summary fields for easier Lua consumption
|
||||||
|
"king": self.king,
|
||||||
|
"alts": self.alts,
|
||||||
|
}
|
||||||
|
# Remove empty optional values for terser payloads.
|
||||||
|
for key in list(payload.keys()):
|
||||||
|
value = payload[key]
|
||||||
|
if value in (None, [], {}, ""):
|
||||||
|
del payload[key]
|
||||||
|
return payload
|
||||||
|
|
||||||
|
|
||||||
|
def _normalise_string_list(values: Optional[Iterable[Any]]) -> List[str]:
|
||||||
|
if not values:
|
||||||
|
return []
|
||||||
|
seen: set[str] = set()
|
||||||
|
result: List[str] = []
|
||||||
|
for value in values:
|
||||||
|
if value is None:
|
||||||
|
continue
|
||||||
|
text = str(value).strip()
|
||||||
|
if not text or text in seen:
|
||||||
|
continue
|
||||||
|
seen.add(text)
|
||||||
|
result.append(text)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _looks_like_hash(value: Optional[str]) -> bool:
|
||||||
|
if not value:
|
||||||
|
return False
|
||||||
|
candidate = value.strip().lower()
|
||||||
|
return len(candidate) == 64 and all(ch in "0123456789abcdef" for ch in candidate)
|
||||||
|
|
||||||
|
|
||||||
|
class MPVFileBuilder:
|
||||||
|
def __init__(self, payload: Dict[str, Any], config: Dict[str, Any]):
|
||||||
|
self.payload = payload or {}
|
||||||
|
self.config = config or {}
|
||||||
|
self.state = MPVfile()
|
||||||
|
self.hydrus_settings = self._resolve_hydrus_settings()
|
||||||
|
self.remote_options = self._resolve_remote_options()
|
||||||
|
self.include_relationships = bool(self.payload.get("include_relationships", True))
|
||||||
|
self.last_url = self._normalise_url(self.payload.get("last_url"))
|
||||||
|
self._initialise_identity()
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# public API
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def build(self) -> Dict[str, Any]:
|
||||||
|
if self.state.type == "hydrus":
|
||||||
|
self._populate_hydrus_by_hash()
|
||||||
|
elif self.state.type == "local":
|
||||||
|
self._populate_local()
|
||||||
|
elif self.state.type == "remote":
|
||||||
|
self._populate_remote()
|
||||||
|
else:
|
||||||
|
# Attempt best effort resolution even for unknown types.
|
||||||
|
self._populate_local(best_effort=True)
|
||||||
|
self._finalise()
|
||||||
|
result = self.state.to_dict()
|
||||||
|
# Append King and Alts info to mpv_metadata for info menu
|
||||||
|
king = self.state.king
|
||||||
|
alts = self.state.alts
|
||||||
|
if king:
|
||||||
|
result.setdefault("mpv_metadata", {})["King"] = king
|
||||||
|
if alts:
|
||||||
|
result.setdefault("mpv_metadata", {})["Alts"] = ", ".join(alts)
|
||||||
|
return result
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# configuration helpers
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _resolve_hydrus_settings(self) -> HydrusSettings:
|
||||||
|
overrides = self.payload.get("hydrus")
|
||||||
|
overrides = overrides if isinstance(overrides, dict) else {}
|
||||||
|
base_url = overrides.get("url") or overrides.get("base_url")
|
||||||
|
access_key = overrides.get("access_key")
|
||||||
|
timeout_raw = overrides.get("timeout") or overrides.get("hydrus_timeout")
|
||||||
|
prefer_service = overrides.get("prefer_service_name")
|
||||||
|
include_relationships = overrides.get("include_relationships")
|
||||||
|
if base_url is None:
|
||||||
|
base_url = get_hydrus_url(self.config)
|
||||||
|
if access_key is None:
|
||||||
|
raw_key = self.config.get("HydrusNetwork_Access_Key")
|
||||||
|
access_key = str(raw_key) if raw_key is not None else None
|
||||||
|
if timeout_raw is None:
|
||||||
|
timeout_raw = self.config.get("HydrusNetwork_Request_Timeout")
|
||||||
|
try:
|
||||||
|
timeout = float(timeout_raw) if timeout_raw is not None else 60.0
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
timeout = 60.0
|
||||||
|
if prefer_service is None:
|
||||||
|
prefer_service = self.config.get("Hydrus_Tag_Service")
|
||||||
|
if isinstance(prefer_service, str):
|
||||||
|
prefer_service = prefer_service.strip() or None
|
||||||
|
if include_relationships is None:
|
||||||
|
include_relationships = self.payload.get("include_relationships")
|
||||||
|
include_relationships = bool(True if include_relationships is None else include_relationships)
|
||||||
|
base_url = base_url.strip() if isinstance(base_url, str) else None
|
||||||
|
access_key = access_key.strip() if isinstance(access_key, str) else None
|
||||||
|
return HydrusSettings(
|
||||||
|
base_url=base_url or None,
|
||||||
|
access_key=access_key or None,
|
||||||
|
timeout=timeout,
|
||||||
|
prefer_service_name=prefer_service,
|
||||||
|
include_relationships=include_relationships,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _resolve_remote_options(self) -> Dict[str, Any]:
|
||||||
|
remote_payload = self.payload.get("remote")
|
||||||
|
remote_payload = remote_payload if isinstance(remote_payload, dict) else {}
|
||||||
|
options = remote_payload.get("options")
|
||||||
|
options = options if isinstance(options, dict) else {}
|
||||||
|
ytdlp_args = options.get("ytdlp_args")
|
||||||
|
if not ytdlp_args:
|
||||||
|
options["ytdlp_args"] = ["--no-playlist", "--skip-download", "--no-warnings"]
|
||||||
|
existing_timeout = options.get("timeout")
|
||||||
|
if existing_timeout is None:
|
||||||
|
options["timeout"] = min(90.0, max(10.0, float(self.payload.get("remote_timeout") or 45.0)))
|
||||||
|
return options
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# initialisation
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _initialise_identity(self) -> None:
|
||||||
|
s = self.state
|
||||||
|
p = self.payload
|
||||||
|
|
||||||
|
def _str_or_none(v):
|
||||||
|
return str(v) if v is not None and v != "" else None
|
||||||
|
|
||||||
|
def _copy_dict_if_dict(v):
|
||||||
|
return dict(v) if isinstance(v, dict) else {}
|
||||||
|
|
||||||
|
# path and filename
|
||||||
|
s.path = _str_or_none(p.get("path"))
|
||||||
|
s.filename = _str_or_none(p.get("filename"))
|
||||||
|
|
||||||
|
# mpv metadata
|
||||||
|
s.mpv_metadata = _copy_dict_if_dict(p.get("mpv_metadata"))
|
||||||
|
|
||||||
|
# tags (support both "tags" and legacy "existing_tags")
|
||||||
|
existing_tags = p.get("tags") or p.get("existing_tags")
|
||||||
|
s.tags = _normalise_string_list(existing_tags)
|
||||||
|
if s.tags:
|
||||||
|
s.original_tags = {tag: tag for tag in s.tags}
|
||||||
|
|
||||||
|
# known URLs + last_url
|
||||||
|
s.known_urls = _normalise_string_list(p.get("known_urls"))
|
||||||
|
if self.last_url and self.last_url not in s.known_urls:
|
||||||
|
s.known_urls.append(self.last_url)
|
||||||
|
|
||||||
|
# source URL (explicit or fallback to last_url)
|
||||||
|
explicit_source = p.get("source_url")
|
||||||
|
s.source_url = self._normalise_url(explicit_source) or self.last_url
|
||||||
|
|
||||||
|
# hash (validate looks-like-hash)
|
||||||
|
hash_candidate = p.get("hash")
|
||||||
|
if isinstance(hash_candidate, str):
|
||||||
|
candidate = hash_candidate.strip().lower()
|
||||||
|
if _looks_like_hash(candidate):
|
||||||
|
s.hash = candidate
|
||||||
|
|
||||||
|
# local_path (non-empty string)
|
||||||
|
local_path_override = p.get("local_path")
|
||||||
|
if isinstance(local_path_override, str):
|
||||||
|
lp = local_path_override.strip()
|
||||||
|
if lp:
|
||||||
|
s.local_path = lp
|
||||||
|
|
||||||
|
# derive remaining fields from path/filename/type
|
||||||
|
self._derive_filename_from_path()
|
||||||
|
self._determine_type()
|
||||||
|
|
||||||
|
|
||||||
|
def _derive_filename_from_path(self) -> None:
|
||||||
|
if self.state.filename or not self.state.path:
|
||||||
|
return
|
||||||
|
parsed = urlparse(self.state.path)
|
||||||
|
if parsed.scheme in ("http", "https", "ytdl") and parsed.path:
|
||||||
|
candidate = Path(parsed.path).name
|
||||||
|
if candidate:
|
||||||
|
self.state.filename = candidate
|
||||||
|
elif parsed.scheme == "file":
|
||||||
|
decoded = self._decode_file_url(self.state.path)
|
||||||
|
if decoded:
|
||||||
|
self.state.filename = Path(decoded).name
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
self.state.filename = Path(self.state.path).name
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _determine_type(self) -> None:
|
||||||
|
s = self.state
|
||||||
|
p = self.payload
|
||||||
|
|
||||||
|
def _set_local_from_path(pth: str | None):
|
||||||
|
if not pth:
|
||||||
|
return
|
||||||
|
# Prefer resolved local path when available
|
||||||
|
resolved = self._resolve_local_path(pth)
|
||||||
|
s.local_path = resolved if resolved else pth
|
||||||
|
s.type = "local"
|
||||||
|
|
||||||
|
# 1) Respect explicit type when valid
|
||||||
|
explicit = p.get("type")
|
||||||
|
if isinstance(explicit, str):
|
||||||
|
lowered = explicit.strip().lower()
|
||||||
|
if lowered in {"local", "hydrus", "remote"}:
|
||||||
|
s.type = lowered
|
||||||
|
if lowered == "local":
|
||||||
|
s.local_path = self._resolve_local_path(s.path)
|
||||||
|
return
|
||||||
|
|
||||||
|
# 2) Work from path
|
||||||
|
path = s.path or ""
|
||||||
|
if not path:
|
||||||
|
s.type = "unknown"
|
||||||
|
return
|
||||||
|
|
||||||
|
# 3) Hydrus-specific quick checks
|
||||||
|
if self._looks_like_hydrus_url(path):
|
||||||
|
s.type = "hydrus"
|
||||||
|
return
|
||||||
|
|
||||||
|
parsed = urlparse(path)
|
||||||
|
scheme = (parsed.scheme or "").lower()
|
||||||
|
|
||||||
|
# 4) scheme-based handling
|
||||||
|
if scheme == "hydrus":
|
||||||
|
s.type = "hydrus"
|
||||||
|
return
|
||||||
|
|
||||||
|
if scheme in {"http", "https", "rtmp", "rtsp", "magnet", "ytdl"}:
|
||||||
|
s.type = "hydrus" if self._looks_like_hydrus_url(path) else "remote"
|
||||||
|
return
|
||||||
|
|
||||||
|
if scheme == "file":
|
||||||
|
decoded = self._decode_file_url(path)
|
||||||
|
if decoded:
|
||||||
|
s.local_path = decoded
|
||||||
|
s.type = "local"
|
||||||
|
return
|
||||||
|
|
||||||
|
# 5) Windows/UNC absolute paths
|
||||||
|
if re.match(r"^[A-Za-z]:[\\/]", path) or path.startswith(("\\\\", "//")):
|
||||||
|
s.type = "local"
|
||||||
|
s.local_path = path
|
||||||
|
return
|
||||||
|
|
||||||
|
# 6) Fallback: if it looks like a URL with a scheme separator treat as remote/hydrus
|
||||||
|
if "://" in path:
|
||||||
|
s.type = "hydrus" if self._looks_like_hydrus_url(path) else "remote"
|
||||||
|
return
|
||||||
|
|
||||||
|
# 7) Otherwise treat as a local path
|
||||||
|
_set_local_from_path(path)
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# population helpers
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _populate_local(self, best_effort: bool = False) -> None:
|
||||||
|
local_path = self.state.local_path or self._resolve_local_path(self.state.path)
|
||||||
|
if local_path:
|
||||||
|
self.state.local_path = local_path
|
||||||
|
self._load_sidecar_tags(local_path)
|
||||||
|
if not self.state.hash:
|
||||||
|
self._compute_local_hash(local_path)
|
||||||
|
# If Hydrus is configured and we have a hash, enrich from Hydrus; otherwise keep local tags only
|
||||||
|
if self.state.hash and self.hydrus_settings.base_url and self.hydrus_settings.access_key:
|
||||||
|
self._populate_hydrus_by_hash()
|
||||||
|
elif best_effort and self.hydrus_settings.base_url and self.state.source_url and self.hydrus_settings.access_key:
|
||||||
|
self._populate_hydrus_by_url(self.state.source_url)
|
||||||
|
|
||||||
|
# (helpers for resolving local path and loading sidecars already exist below)
|
||||||
|
|
||||||
|
def _populate_remote(self) -> None:
|
||||||
|
source_url = self.state.source_url or self.last_url or self.state.path
|
||||||
|
source_url = self._normalise_url(source_url)
|
||||||
|
if source_url:
|
||||||
|
self.state.source_url = source_url
|
||||||
|
remote_payload = {
|
||||||
|
"source_url": self.state.source_url,
|
||||||
|
"existing_tags": self.state.tags,
|
||||||
|
"metadata": self.payload.get("remote_metadata"),
|
||||||
|
"mpv_metadata": self.state.mpv_metadata,
|
||||||
|
"options": self.remote_options,
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
remote_result = metadata.resolve_remote_metadata(remote_payload)
|
||||||
|
except Exception as exc: # pragma: no cover - surfaced to the caller
|
||||||
|
self.state.warnings.append(str(exc))
|
||||||
|
remote_result = None
|
||||||
|
if remote_result:
|
||||||
|
tags = remote_result.get("tags") or []
|
||||||
|
self._merge_tags(tags)
|
||||||
|
self.state.remote_metadata = remote_result.get("metadata")
|
||||||
|
self.state.title = remote_result.get("title") or self.state.title
|
||||||
|
self.state.duration = remote_result.get("duration") or self.state.duration
|
||||||
|
self.state.source_url = remote_result.get("source_url") or self.state.source_url
|
||||||
|
warnings = remote_result.get("warnings") or []
|
||||||
|
if warnings:
|
||||||
|
self.state.warnings.extend(warnings)
|
||||||
|
if self.hydrus_settings.base_url and self.state.source_url:
|
||||||
|
self._populate_hydrus_by_url(self.state.source_url)
|
||||||
|
|
||||||
|
def _populate_hydrus_by_hash(self) -> None:
|
||||||
|
hash_hex = self.state.hash or self._extract_hash_from_path(self.state.path)
|
||||||
|
if hash_hex and not _looks_like_hash(hash_hex):
|
||||||
|
hash_hex = None
|
||||||
|
if not hash_hex:
|
||||||
|
return
|
||||||
|
self.state.hash = hash_hex
|
||||||
|
if not self.hydrus_settings.base_url:
|
||||||
|
return
|
||||||
|
payload: Dict[str, Any] = {
|
||||||
|
"api_url": self.hydrus_settings.base_url,
|
||||||
|
"access_key": self.hydrus_settings.access_key or "",
|
||||||
|
"options": self.hydrus_settings.as_metadata_options(),
|
||||||
|
"hash": hash_hex,
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
result = metadata.fetch_hydrus_metadata(payload)
|
||||||
|
except Exception as exc: # pragma: no cover - surfaced to caller
|
||||||
|
self.state.warnings.append(str(exc))
|
||||||
|
return
|
||||||
|
self._apply_hydrus_result(result)
|
||||||
|
# Enrich relationships using the dedicated Hydrus endpoint (robust GET)
|
||||||
|
if self.include_relationships and self.state.hash and self.hydrus_settings.base_url:
|
||||||
|
self._enrich_relationships_from_api(self.state.hash)
|
||||||
|
|
||||||
|
def _populate_hydrus_by_url(self, url: str) -> None:
|
||||||
|
if not self.hydrus_settings.base_url:
|
||||||
|
return
|
||||||
|
payload: Dict[str, Any] = {
|
||||||
|
"api_url": self.hydrus_settings.base_url,
|
||||||
|
"access_key": self.hydrus_settings.access_key or "",
|
||||||
|
"options": self.hydrus_settings.as_metadata_options(),
|
||||||
|
"url": url,
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
result = metadata.fetch_hydrus_metadata_by_url(payload)
|
||||||
|
except Exception as exc: # pragma: no cover - surfaced to caller
|
||||||
|
self.state.warnings.append(str(exc))
|
||||||
|
return
|
||||||
|
if result.get("error") == "not_found":
|
||||||
|
self.state.warnings.extend(result.get("warnings") or [])
|
||||||
|
return
|
||||||
|
self._apply_hydrus_result(result)
|
||||||
|
self.state.type = "hydrus"
|
||||||
|
matched_url = result.get("matched_url") or result.get("url")
|
||||||
|
if matched_url and matched_url not in self.state.known_urls:
|
||||||
|
self.state.known_urls.append(matched_url)
|
||||||
|
# Enrich relationships once we know the hash
|
||||||
|
if self.include_relationships and self.state.hash and self.hydrus_settings.base_url:
|
||||||
|
self._enrich_relationships_from_api(self.state.hash)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# state modification helpers
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_hydrus_result(self, result: Dict[str, Any]) -> None:
|
||||||
|
metadata_payload = result.get("metadata")
|
||||||
|
if isinstance(metadata_payload, dict):
|
||||||
|
# Process mime into type for Lua
|
||||||
|
mime = metadata_payload.get("mime")
|
||||||
|
if isinstance(mime, str):
|
||||||
|
if mime.startswith("video/"):
|
||||||
|
metadata_payload["type"] = "video"
|
||||||
|
elif mime.startswith("audio/"):
|
||||||
|
metadata_payload["type"] = "audio"
|
||||||
|
elif mime.startswith("image/"):
|
||||||
|
metadata_payload["type"] = "image"
|
||||||
|
else:
|
||||||
|
metadata_payload["type"] = "other"
|
||||||
|
self.state.metadata = metadata_payload
|
||||||
|
# Do NOT overwrite MPVfile.type with metadata.type
|
||||||
|
self._merge_known_urls(metadata_payload.get("known_urls") or metadata_payload.get("known_urls_set"))
|
||||||
|
source_url = metadata_payload.get("original_url") or metadata_payload.get("source_url")
|
||||||
|
if source_url and not self.state.source_url:
|
||||||
|
self.state.source_url = self._normalise_url(source_url)
|
||||||
|
# If file_relationships are embedded in metadata, capture as relationships when missing
|
||||||
|
if self.state.relationships is None:
|
||||||
|
embedded = metadata_payload.get("file_relationships")
|
||||||
|
if isinstance(embedded, dict) and embedded:
|
||||||
|
self.state.relationships = embedded
|
||||||
|
tags = result.get("tags") or []
|
||||||
|
self._merge_tags(tags)
|
||||||
|
hash_value = result.get("hash") or result.get("matched_hash")
|
||||||
|
if isinstance(hash_value, str) and _looks_like_hash(hash_value):
|
||||||
|
self.state.hash = hash_value.lower()
|
||||||
|
self.state.tag_service_key = result.get("tag_service_key") or self.state.tag_service_key
|
||||||
|
self.state.duration = result.get("duration") or self.state.duration
|
||||||
|
self.state.filesize_mb = result.get("filesize_mb") or self.state.filesize_mb
|
||||||
|
self.state.is_video = bool(result.get("is_video") or self.state.is_video)
|
||||||
|
self.state.is_audio = bool(result.get("is_audio") or self.state.is_audio)
|
||||||
|
if result.get("is_deleted") is not None:
|
||||||
|
self.state.is_deleted = bool(result.get("is_deleted"))
|
||||||
|
if result.get("is_local") is not None:
|
||||||
|
self.state.is_local = bool(result.get("is_local"))
|
||||||
|
if result.get("has_current_file_service") is not None:
|
||||||
|
self.state.has_current_file_service = bool(result.get("has_current_file_service"))
|
||||||
|
# Consolidate relationships from explicit result or embedded metadata
|
||||||
|
relationships_obj: Optional[Dict[str, Any]] = None
|
||||||
|
if isinstance(result.get("relationships"), dict):
|
||||||
|
relationships_obj = result["relationships"]
|
||||||
|
self.state.relationships = relationships_obj
|
||||||
|
elif isinstance(self.state.relationships, dict):
|
||||||
|
relationships_obj = self.state.relationships
|
||||||
|
|
||||||
|
# Helper to flatten any hashes from the relationships object
|
||||||
|
def _collect_hashes(obj: Any, acc: set[str]) -> None:
|
||||||
|
if obj is None:
|
||||||
|
return
|
||||||
|
if isinstance(obj, dict):
|
||||||
|
for v in obj.values():
|
||||||
|
_collect_hashes(v, acc)
|
||||||
|
elif isinstance(obj, (list, tuple, set)):
|
||||||
|
for v in obj:
|
||||||
|
_collect_hashes(v, acc)
|
||||||
|
elif isinstance(obj, str) and _looks_like_hash(obj):
|
||||||
|
acc.add(obj.lower())
|
||||||
|
|
||||||
|
# Derive king and alts robustly from available data
|
||||||
|
king: Optional[str] = None
|
||||||
|
alts: list[str] = []
|
||||||
|
|
||||||
|
# 1) Try direct king fields on relationships object
|
||||||
|
rels = relationships_obj or {}
|
||||||
|
if isinstance(rels, dict):
|
||||||
|
# Common variants
|
||||||
|
for key in ("king", "king_hash", "duplicate_king", "best", "best_hash"):
|
||||||
|
val = rels.get(key)
|
||||||
|
if isinstance(val, str) and _looks_like_hash(val):
|
||||||
|
king = val.lower()
|
||||||
|
break
|
||||||
|
if isinstance(val, list):
|
||||||
|
for h in val:
|
||||||
|
if isinstance(h, str) and _looks_like_hash(h):
|
||||||
|
king = h.lower()
|
||||||
|
break
|
||||||
|
if king:
|
||||||
|
break
|
||||||
|
# 2) Extract alternates from known fields: numeric "3" (clips), or textual synonyms
|
||||||
|
for alt_key in ("3", "alternates", "alts", "clips"):
|
||||||
|
val = rels.get(alt_key)
|
||||||
|
if isinstance(val, list):
|
||||||
|
for h in val:
|
||||||
|
if isinstance(h, str) and _looks_like_hash(h):
|
||||||
|
h_low = h.lower()
|
||||||
|
if not king or h_low != king:
|
||||||
|
alts.append(h_low)
|
||||||
|
# some APIs might nest
|
||||||
|
elif isinstance(val, dict):
|
||||||
|
tmp: set[str] = set()
|
||||||
|
_collect_hashes(val, tmp)
|
||||||
|
for h in sorted(tmp):
|
||||||
|
if not king or h != king:
|
||||||
|
alts.append(h)
|
||||||
|
|
||||||
|
# 3) Use relationship_metadata keys as additional alternates and king hint
|
||||||
|
rel_meta = result.get("relationship_metadata")
|
||||||
|
if isinstance(rel_meta, dict):
|
||||||
|
# prefer king candidate with no clip_time if not set
|
||||||
|
if not king:
|
||||||
|
for h, meta in rel_meta.items():
|
||||||
|
if isinstance(h, str) and _looks_like_hash(h) and isinstance(meta, dict):
|
||||||
|
if not meta.get("clip_time"):
|
||||||
|
king = h.lower()
|
||||||
|
break
|
||||||
|
for h in rel_meta.keys():
|
||||||
|
if isinstance(h, str) and _looks_like_hash(h):
|
||||||
|
h_low = h.lower()
|
||||||
|
if not king or h_low != king:
|
||||||
|
alts.append(h_low)
|
||||||
|
|
||||||
|
# 4) As a last resort, flatten all relationship hashes
|
||||||
|
if not alts and relationships_obj:
|
||||||
|
tmp: set[str] = set()
|
||||||
|
_collect_hashes(relationships_obj, tmp)
|
||||||
|
for h in sorted(tmp):
|
||||||
|
if not king or h != king:
|
||||||
|
alts.append(h)
|
||||||
|
|
||||||
|
# 5) Include current file when appropriate
|
||||||
|
if self.state.hash and (not king or self.state.hash != king) and self.state.hash not in alts:
|
||||||
|
alts.append(self.state.hash)
|
||||||
|
|
||||||
|
# 6) Sort alternates by clip start time when available
|
||||||
|
rel_meta_all = result.get("relationship_metadata") if isinstance(result.get("relationship_metadata"), dict) else {}
|
||||||
|
def _clip_start_for(h: str) -> float:
|
||||||
|
meta = rel_meta_all.get(h) if isinstance(rel_meta_all, dict) else None
|
||||||
|
clip = meta.get("clip_time") if isinstance(meta, dict) else None
|
||||||
|
if isinstance(clip, str):
|
||||||
|
m = re.match(r"^(\d+)-(\d+)$", clip)
|
||||||
|
if m:
|
||||||
|
try:
|
||||||
|
return float(m.group(1))
|
||||||
|
except Exception:
|
||||||
|
return float("inf")
|
||||||
|
return float("inf")
|
||||||
|
|
||||||
|
if alts:
|
||||||
|
# de-duplicate while preserving earliest clip time ordering
|
||||||
|
seen: set[str] = set()
|
||||||
|
alts = [h for h in sorted(alts, key=_clip_start_for) if (h not in seen and not seen.add(h))]
|
||||||
|
|
||||||
|
self.state.king = king
|
||||||
|
self.state.alts = alts
|
||||||
|
if isinstance(result.get("relationship_metadata"), dict):
|
||||||
|
self.state.relationship_metadata = result["relationship_metadata"]
|
||||||
|
self.state.title = result.get("title") or self.state.title
|
||||||
|
self.state.clip_time = result.get("clip_time") or self.state.clip_time
|
||||||
|
if result.get("swap_recommended"):
|
||||||
|
self.state.swap_recommended = True
|
||||||
|
warnings = result.get("warnings") or []
|
||||||
|
if warnings:
|
||||||
|
self.state.warnings.extend(warnings)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# relationships enrichment (Hydrus endpoint + alt metadata)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _enrich_relationships_from_api(self, file_hash: str) -> None:
|
||||||
|
"""Fetch relationships for the given hash and enrich state's king/alts and alt metadata.
|
||||||
|
|
||||||
|
- Uses GET /manage_file_relationships/get_file_relationships?hash=...
|
||||||
|
- If alts exist, batch-fetch their metadata via GET /get_files/file_metadata?hashes=[...]
|
||||||
|
- Extracts title, duration, size, tags (cleaned: title: kept with namespace, others stripped)
|
||||||
|
"""
|
||||||
|
base_url = self.hydrus_settings.base_url or ""
|
||||||
|
access_key = self.hydrus_settings.access_key or ""
|
||||||
|
if not base_url:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
client = HydrusClient(base_url, access_key, timeout=self.hydrus_settings.timeout)
|
||||||
|
except Exception as exc: # pragma: no cover - construction should rarely fail
|
||||||
|
self.state.warnings.append(f"Hydrus client init failed: {exc}")
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
rel_resp = client.get_file_relationships(file_hash)
|
||||||
|
except HydrusRequestError as hre: # pragma: no cover - surfaced but non-fatal
|
||||||
|
self.state.warnings.append(f"relationships api: {hre}")
|
||||||
|
return
|
||||||
|
except Exception as exc: # pragma: no cover
|
||||||
|
self.state.warnings.append(f"relationships api: {exc}")
|
||||||
|
return
|
||||||
|
|
||||||
|
rel_map = rel_resp.get("file_relationships") or {}
|
||||||
|
rel_obj = None
|
||||||
|
if isinstance(rel_map, dict):
|
||||||
|
rel_obj = rel_map.get(file_hash) or next((v for v in rel_map.values() if isinstance(v, dict)), None)
|
||||||
|
if isinstance(rel_obj, dict):
|
||||||
|
# Preserve the full relationships object
|
||||||
|
self.state.relationships = rel_obj
|
||||||
|
# Update king and alts from canonical fields
|
||||||
|
king = rel_obj.get("king")
|
||||||
|
alts = rel_obj.get("3") or []
|
||||||
|
if isinstance(king, str) and _looks_like_hash(king):
|
||||||
|
self.state.king = king.lower()
|
||||||
|
if isinstance(alts, list):
|
||||||
|
self.state.alts = [h.lower() for h in alts if isinstance(h, str) and _looks_like_hash(h)]
|
||||||
|
|
||||||
|
# Fetch alt metadata if we have alts
|
||||||
|
if not self.state.alts:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
meta_resp = client.fetch_file_metadata(
|
||||||
|
hashes=self.state.alts,
|
||||||
|
include_service_keys_to_tags=True,
|
||||||
|
include_duration=True,
|
||||||
|
include_size=True,
|
||||||
|
include_file_urls=False,
|
||||||
|
include_mime=False,
|
||||||
|
)
|
||||||
|
except HydrusRequestError as hre: # pragma: no cover
|
||||||
|
self.state.warnings.append(f"metadata api: {hre}")
|
||||||
|
return
|
||||||
|
except Exception as exc: # pragma: no cover
|
||||||
|
self.state.warnings.append(f"metadata api: {exc}")
|
||||||
|
return
|
||||||
|
|
||||||
|
if not isinstance(meta_resp, dict):
|
||||||
|
return
|
||||||
|
entries = meta_resp.get("metadata") or []
|
||||||
|
if not isinstance(entries, list):
|
||||||
|
return
|
||||||
|
|
||||||
|
def _extract_tags(meta: Dict[str, Any]) -> list[str]:
|
||||||
|
tags: list[str] = []
|
||||||
|
tag_root = meta.get("tags") or meta.get("service_keys_to_statuses_to_tags") or {}
|
||||||
|
if isinstance(tag_root, dict):
|
||||||
|
for service_dict in tag_root.values():
|
||||||
|
if not isinstance(service_dict, dict):
|
||||||
|
continue
|
||||||
|
# Prefer storage_tags but fall back to any list values under known keys
|
||||||
|
storage = service_dict.get("storage_tags")
|
||||||
|
if isinstance(storage, dict):
|
||||||
|
for vals in storage.values():
|
||||||
|
if isinstance(vals, list):
|
||||||
|
tags.extend([str(t) for t in vals if isinstance(t, str)])
|
||||||
|
else:
|
||||||
|
# fall back: inspect lists directly under service_dict
|
||||||
|
for vals in service_dict.values():
|
||||||
|
if isinstance(vals, list):
|
||||||
|
tags.extend([str(t) for t in vals if isinstance(t, str)])
|
||||||
|
return tags
|
||||||
|
|
||||||
|
def _clean_tags_and_title(all_tags: list[str]) -> tuple[Optional[str], list[str]]:
|
||||||
|
title_val: Optional[str] = None
|
||||||
|
cleaned: list[str] = []
|
||||||
|
for tag in all_tags:
|
||||||
|
if not isinstance(tag, str):
|
||||||
|
continue
|
||||||
|
if tag.startswith("title:"):
|
||||||
|
if title_val is None:
|
||||||
|
title_val = tag.split(":", 1)[1]
|
||||||
|
cleaned.append(tag) # keep namespaced title
|
||||||
|
else:
|
||||||
|
if ":" in tag:
|
||||||
|
cleaned.append(tag.split(":", 1)[1])
|
||||||
|
else:
|
||||||
|
cleaned.append(tag)
|
||||||
|
return title_val, cleaned
|
||||||
|
|
||||||
|
for meta in entries:
|
||||||
|
if not isinstance(meta, dict):
|
||||||
|
continue
|
||||||
|
h = meta.get("hash")
|
||||||
|
if not (isinstance(h, str) and _looks_like_hash(h)):
|
||||||
|
continue
|
||||||
|
tags_all = _extract_tags(meta)
|
||||||
|
title_val, tags_clean = _clean_tags_and_title(tags_all)
|
||||||
|
alt_info = {
|
||||||
|
"title": title_val,
|
||||||
|
"duration": meta.get("duration"),
|
||||||
|
"size": meta.get("size"),
|
||||||
|
"tags": tags_clean,
|
||||||
|
}
|
||||||
|
self.state.relationship_metadata[h.lower()] = alt_info
|
||||||
|
|
||||||
|
def _merge_tags(self, tags: Sequence[Any]) -> None:
|
||||||
|
incoming = _normalise_string_list(tags)
|
||||||
|
if not incoming:
|
||||||
|
return
|
||||||
|
combined = list(self.state.tags or []) + incoming
|
||||||
|
self.state.tags = unique_preserve_order(combined)
|
||||||
|
for tag in incoming:
|
||||||
|
if tag not in self.state.original_tags:
|
||||||
|
self.state.original_tags[tag] = tag
|
||||||
|
|
||||||
|
def _merge_known_urls(self, urls: Optional[Iterable[Any]]) -> None:
|
||||||
|
if not urls:
|
||||||
|
return
|
||||||
|
combined = list(self.state.known_urls or []) + _normalise_string_list(urls)
|
||||||
|
self.state.known_urls = unique_preserve_order(combined)
|
||||||
|
|
||||||
|
def _load_sidecar_tags(self, local_path: str) -> None:
|
||||||
|
try:
|
||||||
|
media_path = Path(local_path)
|
||||||
|
except Exception:
|
||||||
|
return
|
||||||
|
if not media_path.exists():
|
||||||
|
return
|
||||||
|
candidates = [media_path.with_suffix(".tags"), media_path.with_suffix(".tags.txt")]
|
||||||
|
for candidate in candidates:
|
||||||
|
if candidate.exists():
|
||||||
|
hash_value, tags, known = self._read_sidecar(candidate)
|
||||||
|
if hash_value and not self.state.hash and _looks_like_hash(hash_value):
|
||||||
|
self.state.hash = hash_value.lower()
|
||||||
|
self._merge_tags(tags)
|
||||||
|
self._merge_known_urls(known)
|
||||||
|
break
|
||||||
|
|
||||||
|
def _read_sidecar(self, sidecar_path: Path) -> tuple[Optional[str], List[str], List[str]]:
|
||||||
|
try:
|
||||||
|
raw = sidecar_path.read_text(encoding="utf-8", errors="ignore")
|
||||||
|
except OSError:
|
||||||
|
return None, [], []
|
||||||
|
hash_value: Optional[str] = None
|
||||||
|
tags: List[str] = []
|
||||||
|
known_urls: List[str] = []
|
||||||
|
for line in raw.splitlines():
|
||||||
|
trimmed = line.strip()
|
||||||
|
if not trimmed:
|
||||||
|
continue
|
||||||
|
lowered = trimmed.lower()
|
||||||
|
if lowered.startswith("hash:"):
|
||||||
|
candidate = trimmed.split(":", 1)[1].strip() if ":" in trimmed else ""
|
||||||
|
if candidate:
|
||||||
|
hash_value = candidate
|
||||||
|
elif lowered.startswith("known_url:") or lowered.startswith("url:"):
|
||||||
|
candidate = trimmed.split(":", 1)[1].strip() if ":" in trimmed else ""
|
||||||
|
if candidate:
|
||||||
|
known_urls.append(candidate)
|
||||||
|
else:
|
||||||
|
tags.append(trimmed)
|
||||||
|
return hash_value, tags, known_urls
|
||||||
|
|
||||||
|
def _compute_local_hash(self, local_path: str) -> None:
|
||||||
|
try:
|
||||||
|
digest = sha256_file(Path(local_path))
|
||||||
|
except OSError as exc:
|
||||||
|
self.state.warnings.append(f"sha256 failed: {exc}")
|
||||||
|
return
|
||||||
|
self.state.hash = digest.lower()
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# finalisation helpers
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _finalise(self) -> None:
|
||||||
|
if self.state.tags:
|
||||||
|
self.state.tags = unique_preserve_order(self.state.tags)
|
||||||
|
if self.state.known_urls:
|
||||||
|
self.state.known_urls = unique_preserve_order(self.state.known_urls)
|
||||||
|
# Ensure metadata.type is always present for Lua, but do NOT overwrite MPVfile.type
|
||||||
|
if not self.state.title:
|
||||||
|
if self.state.metadata.get("title"):
|
||||||
|
self.state.title = str(self.state.metadata["title"]).strip()
|
||||||
|
elif self.state.filename:
|
||||||
|
self.state.title = self.state.filename
|
||||||
|
if self.state.hash and not _looks_like_hash(self.state.hash):
|
||||||
|
self.state.hash = None
|
||||||
|
if self.state.relationship_metadata is None:
|
||||||
|
self.state.relationship_metadata = {}
|
||||||
|
if self.state.relationships is not None and not isinstance(self.state.relationships, dict):
|
||||||
|
self.state.relationships = None
|
||||||
|
if self.state.original_tags is None:
|
||||||
|
self.state.original_tags = {}
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# util helpers
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _normalise_url(value: Any) -> Optional[str]:
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
text = str(value).strip()
|
||||||
|
if not text:
|
||||||
|
return None
|
||||||
|
return text
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _resolve_local_path(path: Optional[str]) -> Optional[str]:
|
||||||
|
if not path:
|
||||||
|
return None
|
||||||
|
parsed = urlparse(path)
|
||||||
|
if parsed.scheme == "file":
|
||||||
|
decoded = MPVFileBuilder._decode_file_url(path)
|
||||||
|
return decoded
|
||||||
|
return path
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _decode_file_url(value: str) -> Optional[str]:
|
||||||
|
parsed = urlparse(value)
|
||||||
|
if parsed.scheme != "file":
|
||||||
|
return None
|
||||||
|
netloc = parsed.netloc or ""
|
||||||
|
path = unquote(parsed.path or "")
|
||||||
|
if netloc:
|
||||||
|
path = f"//{netloc}{path}"
|
||||||
|
if os.name == "nt" and path.startswith("/") and re.match(r"/[A-Za-z]:", path):
|
||||||
|
path = path[1:]
|
||||||
|
path = path.replace("/", os.sep)
|
||||||
|
return path
|
||||||
|
|
||||||
|
def _looks_like_hydrus_url(self, url: str) -> bool:
|
||||||
|
if not url:
|
||||||
|
return False
|
||||||
|
if url.startswith("hydrus://"):
|
||||||
|
return True
|
||||||
|
if "Hydrus-Client-API-Access-Key=" in url:
|
||||||
|
return True
|
||||||
|
base = self.hydrus_settings.base_url
|
||||||
|
if base and url.startswith(base) and "/get_files/" in url:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_hash_from_path(path: Optional[str]) -> Optional[str]:
|
||||||
|
if not path:
|
||||||
|
return None
|
||||||
|
parsed = urlparse(path)
|
||||||
|
query = parse_qs(parsed.query)
|
||||||
|
if "hash" in query and query["hash"]:
|
||||||
|
candidate = query["hash"][0].strip()
|
||||||
|
if candidate:
|
||||||
|
return candidate.lower()
|
||||||
|
match = re.search(r"hash=([0-9a-fA-F]{64})", path)
|
||||||
|
if match:
|
||||||
|
return match.group(1).lower()
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def build_mpv_file_state(payload: Dict[str, Any], config: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
||||||
|
builder = MPVFileBuilder(payload or {}, config or {})
|
||||||
|
return builder.build()
|
||||||
143
helper/progress.py
Normal file
143
helper/progress.py
Normal file
@@ -0,0 +1,143 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Text-based progress bar utilities for consistent display across all downloads."""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from helper.logger import log, debug
|
||||||
|
|
||||||
|
|
||||||
|
def format_progress_bar(current: int, total: int, width: int = 40, label: str = "") -> str:
|
||||||
|
"""Create a text-based progress bar.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
current: Current progress (bytes/items)
|
||||||
|
total: Total to complete (bytes/items)
|
||||||
|
width: Width of the bar in characters (default 40)
|
||||||
|
label: Optional label prefix
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Formatted progress bar string
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
format_progress_bar(50, 100)
|
||||||
|
# Returns: "[████████████████░░░░░░░░░░░░░░░░░░░░] 50.0%"
|
||||||
|
|
||||||
|
format_progress_bar(256*1024*1024, 1024*1024*1024, label="download.zip")
|
||||||
|
# Returns: "download.zip: [████████░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░] 25.0%"
|
||||||
|
"""
|
||||||
|
if total <= 0:
|
||||||
|
percentage = 0
|
||||||
|
filled = 0
|
||||||
|
else:
|
||||||
|
percentage = (current / total) * 100
|
||||||
|
filled = int((current / total) * width)
|
||||||
|
|
||||||
|
# Create bar: filled blocks + empty blocks
|
||||||
|
bar = "█" * filled + "░" * (width - filled)
|
||||||
|
|
||||||
|
# Format percentage
|
||||||
|
pct_str = f"{percentage:.1f}%"
|
||||||
|
|
||||||
|
# Build result
|
||||||
|
if label:
|
||||||
|
result = f"{label}: [{bar}] {pct_str}"
|
||||||
|
else:
|
||||||
|
result = f"[{bar}] {pct_str}"
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def format_size(bytes_val: float) -> str:
|
||||||
|
"""Format bytes to human-readable size.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
format_size(1024) -> "1.00 KB"
|
||||||
|
format_size(1024*1024) -> "1.00 MB"
|
||||||
|
format_size(1024*1024*1024) -> "1.00 GB"
|
||||||
|
"""
|
||||||
|
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
|
||||||
|
if bytes_val < 1024:
|
||||||
|
return f"{bytes_val:.2f} {unit}"
|
||||||
|
bytes_val /= 1024
|
||||||
|
return f"{bytes_val:.2f} PB"
|
||||||
|
|
||||||
|
|
||||||
|
def format_download_status(filename: str, current: int, total: int, speed: float = 0) -> str:
|
||||||
|
"""Format download status with progress bar and details.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filename: Name of file being downloaded
|
||||||
|
current: Current bytes downloaded
|
||||||
|
total: Total file size
|
||||||
|
speed: Download speed in bytes/sec
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Formatted status line
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
format_download_status("movie.mkv", 512*1024*1024, 2*1024*1024*1024, 10*1024*1024)
|
||||||
|
# Returns: "movie.mkv: [████████████░░░░░░░░░░░░░░░░░░░░░░░░░░] 25.0% (512.00 MB / 2.00 GB @ 10.00 MB/s)"
|
||||||
|
"""
|
||||||
|
bar = format_progress_bar(current, total, width=30)
|
||||||
|
size_current = format_size(current)
|
||||||
|
size_total = format_size(total)
|
||||||
|
|
||||||
|
if speed > 0:
|
||||||
|
speed_str = f" @ {format_size(speed)}/s"
|
||||||
|
else:
|
||||||
|
speed_str = ""
|
||||||
|
|
||||||
|
return f"{bar} ({size_current} / {size_total}{speed_str})"
|
||||||
|
|
||||||
|
|
||||||
|
def print_progress(filename: str, current: int, total: int, speed: float = 0, end: str = "\r") -> None:
|
||||||
|
"""Print download progress to stderr (doesn't interfere with piped output).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filename: File being downloaded
|
||||||
|
current: Current bytes
|
||||||
|
total: Total bytes
|
||||||
|
speed: Speed in bytes/sec
|
||||||
|
end: Line ending (default "\r" for overwriting, use "\n" for final)
|
||||||
|
"""
|
||||||
|
status = format_download_status(filename, current, total, speed)
|
||||||
|
debug(status, end=end, flush=True)
|
||||||
|
|
||||||
|
|
||||||
|
def print_final_progress(filename: str, total: int, elapsed: float) -> None:
|
||||||
|
"""Print final progress line (100%) with time elapsed.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filename: File that was downloaded
|
||||||
|
total: Total size
|
||||||
|
elapsed: Time elapsed in seconds
|
||||||
|
"""
|
||||||
|
bar = format_progress_bar(total, total, width=30)
|
||||||
|
size_str = format_size(total)
|
||||||
|
|
||||||
|
# Format elapsed time
|
||||||
|
if elapsed < 60:
|
||||||
|
time_str = f"{elapsed:.1f}s"
|
||||||
|
elif elapsed < 3600:
|
||||||
|
minutes = elapsed / 60
|
||||||
|
time_str = f"{minutes:.1f}m"
|
||||||
|
else:
|
||||||
|
hours = elapsed / 3600
|
||||||
|
time_str = f"{hours:.2f}h"
|
||||||
|
|
||||||
|
debug(f"{bar} ({size_str}) - {time_str}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Demo
|
||||||
|
import time
|
||||||
|
|
||||||
|
log("Progress Bar Demo:", file=sys.stderr)
|
||||||
|
|
||||||
|
# Demo 1: Simple progress
|
||||||
|
for i in range(101):
|
||||||
|
print_progress("demo.bin", i * 10 * 1024 * 1024, 1024 * 1024 * 1024)
|
||||||
|
time.sleep(0.02)
|
||||||
|
|
||||||
|
print_final_progress("demo.bin", 1024 * 1024 * 1024, 2.0)
|
||||||
|
log()
|
||||||
159
helper/query_parser.py
Normal file
159
helper/query_parser.py
Normal file
@@ -0,0 +1,159 @@
|
|||||||
|
"""Dynamic query parser for filtering and field extraction.
|
||||||
|
|
||||||
|
Supports query syntax like:
|
||||||
|
- isbn:0557677203
|
||||||
|
- author:"Albert Pike"
|
||||||
|
- title:"Morals and Dogma"
|
||||||
|
- year:2010
|
||||||
|
- isbn:0557677203 author:"Albert Pike"
|
||||||
|
- Mixed with free text: "Morals" isbn:0557677203
|
||||||
|
|
||||||
|
This allows flexible query strings that can be parsed by any search provider
|
||||||
|
to extract specific fields for filtering and searching.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Dict, List, Tuple, Optional, Any
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
def parse_query(query: str) -> Dict[str, Any]:
|
||||||
|
"""Parse a query string into field:value pairs and free text.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: Query string like 'isbn:0557677203 author:"Albert Pike" Morals'
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with:
|
||||||
|
- 'fields': Dict[field_name, field_value] for structured fields
|
||||||
|
- 'text': str with remaining free text
|
||||||
|
- 'raw': str original query
|
||||||
|
"""
|
||||||
|
result = {
|
||||||
|
'fields': {},
|
||||||
|
'text': '',
|
||||||
|
'raw': query,
|
||||||
|
}
|
||||||
|
|
||||||
|
if not query or not query.strip():
|
||||||
|
return result
|
||||||
|
|
||||||
|
query = query.strip()
|
||||||
|
remaining_parts = []
|
||||||
|
|
||||||
|
# Pattern to match: field:value or field:"quoted value"
|
||||||
|
# Matches: word: followed by either quoted string or unquoted word
|
||||||
|
pattern = r'(\w+):(?:"([^"]*)"|(\S+))'
|
||||||
|
|
||||||
|
pos = 0
|
||||||
|
for match in re.finditer(pattern, query):
|
||||||
|
# Add any text before this match
|
||||||
|
if match.start() > pos:
|
||||||
|
before_text = query[pos:match.start()].strip()
|
||||||
|
if before_text:
|
||||||
|
remaining_parts.append(before_text)
|
||||||
|
|
||||||
|
field_name = match.group(1).lower()
|
||||||
|
field_value = match.group(2) if match.group(2) is not None else match.group(3)
|
||||||
|
|
||||||
|
result['fields'][field_name] = field_value
|
||||||
|
pos = match.end()
|
||||||
|
|
||||||
|
# Add any remaining text after last match
|
||||||
|
if pos < len(query):
|
||||||
|
remaining_text = query[pos:].strip()
|
||||||
|
if remaining_text:
|
||||||
|
remaining_parts.append(remaining_text)
|
||||||
|
|
||||||
|
result['text'] = ' '.join(remaining_parts)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def get_field(parsed_query: Dict[str, Any], field_name: str, default: Optional[str] = None) -> Optional[str]:
|
||||||
|
"""Get a field value from parsed query, with optional default.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
parsed_query: Result from parse_query()
|
||||||
|
field_name: Field name to look up (case-insensitive)
|
||||||
|
default: Default value if field not found
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Field value or default
|
||||||
|
"""
|
||||||
|
return parsed_query.get('fields', {}).get(field_name.lower(), default)
|
||||||
|
|
||||||
|
|
||||||
|
def has_field(parsed_query: Dict[str, Any], field_name: str) -> bool:
|
||||||
|
"""Check if a field exists in parsed query.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
parsed_query: Result from parse_query()
|
||||||
|
field_name: Field name to check (case-insensitive)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if field exists
|
||||||
|
"""
|
||||||
|
return field_name.lower() in parsed_query.get('fields', {})
|
||||||
|
|
||||||
|
|
||||||
|
def get_free_text(parsed_query: Dict[str, Any]) -> str:
|
||||||
|
"""Get the free text portion of a parsed query.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
parsed_query: Result from parse_query()
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Free text or empty string
|
||||||
|
"""
|
||||||
|
return parsed_query.get('text', '')
|
||||||
|
|
||||||
|
|
||||||
|
def build_query_for_provider(
|
||||||
|
parsed_query: Dict[str, Any],
|
||||||
|
provider: str,
|
||||||
|
extraction_map: Optional[Dict[str, str]] = None
|
||||||
|
) -> Tuple[str, Dict[str, str]]:
|
||||||
|
"""Build a search query and filters dict for a specific provider.
|
||||||
|
|
||||||
|
Different providers have different search syntax. This function
|
||||||
|
extracts the appropriate fields for each provider.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
parsed_query: Result from parse_query()
|
||||||
|
provider: Provider name ('libgen', 'openlibrary', 'soulseek')
|
||||||
|
extraction_map: Optional mapping of field names to provider-specific names
|
||||||
|
e.g. {'isbn': 'isbn', 'author': 'author', 'title': 'title'}
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (search_query: str, extracted_fields: Dict[field, value])
|
||||||
|
"""
|
||||||
|
extraction_map = extraction_map or {}
|
||||||
|
extracted = {}
|
||||||
|
free_text = get_free_text(parsed_query)
|
||||||
|
|
||||||
|
# Extract fields based on map
|
||||||
|
for field_name, provider_key in extraction_map.items():
|
||||||
|
if has_field(parsed_query, field_name):
|
||||||
|
extracted[provider_key] = get_field(parsed_query, field_name)
|
||||||
|
|
||||||
|
# If provider-specific extraction needed, providers can implement it
|
||||||
|
# For now, return the free text as query
|
||||||
|
return free_text, extracted
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# Test cases
|
||||||
|
test_queries = [
|
||||||
|
'isbn:0557677203',
|
||||||
|
'isbn:0557677203 author:"Albert Pike"',
|
||||||
|
'Morals and Dogma isbn:0557677203',
|
||||||
|
'title:"Morals and Dogma" author:"Albert Pike" year:2010',
|
||||||
|
'search term without fields',
|
||||||
|
'author:"John Smith" title:"A Book"',
|
||||||
|
]
|
||||||
|
|
||||||
|
for query in test_queries:
|
||||||
|
print(f"\nQuery: {query}")
|
||||||
|
parsed = parse_query(query)
|
||||||
|
print(f" Fields: {parsed['fields']}")
|
||||||
|
print(f" Text: {parsed['text']}")
|
||||||
1777
helper/search_provider.py
Normal file
1777
helper/search_provider.py
Normal file
File diff suppressed because it is too large
Load Diff
155
helper/tasks.py
Normal file
155
helper/tasks.py
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
"""Background task handling and IPC helpers for mpv integration."""
|
||||||
|
from __future__ import annotations
|
||||||
|
import errno
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import socket
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from helper.logger import log
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from typing import IO, Iterable
|
||||||
|
def connect_ipc(path: str, timeout: float = 5.0) -> IO[bytes] | None:
|
||||||
|
"""Connect to the mpv IPC server located at *path*."""
|
||||||
|
deadline = time.time() + timeout
|
||||||
|
if not path:
|
||||||
|
return None
|
||||||
|
if os.name == 'nt':
|
||||||
|
# mpv exposes a named pipe on Windows. Keep retrying until it is ready.
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
return open(path, 'r+b', buffering=0)
|
||||||
|
except FileNotFoundError:
|
||||||
|
if time.time() > deadline:
|
||||||
|
return None
|
||||||
|
time.sleep(0.05)
|
||||||
|
except OSError as exc: # Pipe busy
|
||||||
|
if exc.errno not in (errno.ENOENT, errno.EPIPE, errno.EBUSY):
|
||||||
|
raise
|
||||||
|
if time.time() > deadline:
|
||||||
|
return None
|
||||||
|
time.sleep(0.05)
|
||||||
|
else:
|
||||||
|
sock = socket.socket(socket.AF_UNIX)
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
sock.connect(path)
|
||||||
|
return sock.makefile('r+b', buffering=0)
|
||||||
|
except FileNotFoundError:
|
||||||
|
if time.time() > deadline:
|
||||||
|
return None
|
||||||
|
time.sleep(0.05)
|
||||||
|
except OSError as exc:
|
||||||
|
if exc.errno not in (errno.ENOENT, errno.ECONNREFUSED):
|
||||||
|
raise
|
||||||
|
if time.time() > deadline:
|
||||||
|
return None
|
||||||
|
time.sleep(0.05)
|
||||||
|
def ipc_sender(ipc: IO[bytes] | None):
|
||||||
|
"""Create a helper function for sending script messages via IPC."""
|
||||||
|
if ipc is None:
|
||||||
|
def _noop(_event: str, _payload: dict) -> None:
|
||||||
|
return None
|
||||||
|
return _noop
|
||||||
|
lock = threading.Lock()
|
||||||
|
def _send(event: str, payload: dict) -> None:
|
||||||
|
message = json.dumps({'command': ['script-message', event, json.dumps(payload)]}, ensure_ascii=False)
|
||||||
|
encoded = message.encode('utf-8') + b'\n'
|
||||||
|
with lock:
|
||||||
|
try:
|
||||||
|
ipc.write(encoded)
|
||||||
|
ipc.flush()
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
return _send
|
||||||
|
def iter_stream(stream: Iterable[str]) -> Iterable[str]:
|
||||||
|
for raw in stream:
|
||||||
|
yield raw.rstrip('\r\n')
|
||||||
|
def _run_task(args, parser) -> int:
|
||||||
|
if not args.command:
|
||||||
|
parser.error('run-task requires a command to execute (use "--" before the command).')
|
||||||
|
env = os.environ.copy()
|
||||||
|
for entry in args.env:
|
||||||
|
key, sep, value = entry.partition('=')
|
||||||
|
if not sep:
|
||||||
|
parser.error(f'Invalid environment variable definition: {entry!r}')
|
||||||
|
env[key] = value
|
||||||
|
command = list(args.command)
|
||||||
|
if command and command[0] == '--':
|
||||||
|
command.pop(0)
|
||||||
|
notifier = ipc_sender(connect_ipc(args.ipc, timeout=args.ipc_timeout))
|
||||||
|
if not command:
|
||||||
|
notifier('downlow-task-event', {
|
||||||
|
'id': args.task_id,
|
||||||
|
'event': 'error',
|
||||||
|
'message': 'No command provided after separator',
|
||||||
|
})
|
||||||
|
log('[downlow.py] No command provided for run-task', file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
if command and isinstance(command[0], str) and sys.executable:
|
||||||
|
first = command[0].lower()
|
||||||
|
if first in {'python', 'python3', 'py', 'python.exe', 'python3.exe', 'py.exe'}:
|
||||||
|
command[0] = sys.executable
|
||||||
|
if os.environ.get('DOWNLOW_DEBUG'):
|
||||||
|
log(f"Launching command: {command}", file=sys.stderr)
|
||||||
|
notifier('downlow-task-event', {
|
||||||
|
'id': args.task_id,
|
||||||
|
'event': 'start',
|
||||||
|
'command': command,
|
||||||
|
'cwd': args.cwd or os.getcwd(),
|
||||||
|
})
|
||||||
|
try:
|
||||||
|
process = subprocess.Popen(
|
||||||
|
command,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
cwd=args.cwd or None,
|
||||||
|
env=env,
|
||||||
|
text=True,
|
||||||
|
bufsize=1,
|
||||||
|
universal_newlines=True,
|
||||||
|
)
|
||||||
|
except FileNotFoundError as exc:
|
||||||
|
notifier('downlow-task-event', {
|
||||||
|
'id': args.task_id,
|
||||||
|
'event': 'error',
|
||||||
|
'message': f'Executable not found: {exc.filename}',
|
||||||
|
})
|
||||||
|
log(f"{exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
stdout_lines: list[str] = []
|
||||||
|
stderr_lines: list[str] = []
|
||||||
|
def pump(stream: IO[str], label: str, sink: list[str]) -> None:
|
||||||
|
for line in iter_stream(stream):
|
||||||
|
sink.append(line)
|
||||||
|
notifier('downlow-task-event', {
|
||||||
|
'id': args.task_id,
|
||||||
|
'event': label,
|
||||||
|
'line': line,
|
||||||
|
})
|
||||||
|
threads = []
|
||||||
|
if process.stdout:
|
||||||
|
t_out = threading.Thread(target=pump, args=(process.stdout, 'stdout', stdout_lines), daemon=True)
|
||||||
|
t_out.start()
|
||||||
|
threads.append(t_out)
|
||||||
|
if process.stderr:
|
||||||
|
t_err = threading.Thread(target=pump, args=(process.stderr, 'stderr', stderr_lines), daemon=True)
|
||||||
|
t_err.start()
|
||||||
|
threads.append(t_err)
|
||||||
|
return_code = process.wait()
|
||||||
|
for t in threads:
|
||||||
|
t.join(timeout=0.1)
|
||||||
|
notifier('downlow-task-event', {
|
||||||
|
'id': args.task_id,
|
||||||
|
'event': 'exit',
|
||||||
|
'returncode': return_code,
|
||||||
|
'success': return_code == 0,
|
||||||
|
})
|
||||||
|
# Also mirror aggregated output to stdout/stderr for compatibility when IPC is unavailable.
|
||||||
|
if stdout_lines:
|
||||||
|
log('\n'.join(stdout_lines))
|
||||||
|
if stderr_lines:
|
||||||
|
log('\n'.join(stderr_lines), file=sys.stderr)
|
||||||
|
return return_code
|
||||||
706
helper/unified_book_downloader.py
Normal file
706
helper/unified_book_downloader.py
Normal file
@@ -0,0 +1,706 @@
|
|||||||
|
"""Unified book downloader - handles Archive.org borrowing and Libgen fallback.
|
||||||
|
|
||||||
|
This module provides a single interface for downloading books from multiple sources:
|
||||||
|
1. Try Archive.org direct download (if available)
|
||||||
|
2. Try Archive.org borrowing (if user has credentials)
|
||||||
|
3. Fallback to Libgen search by ISBN
|
||||||
|
4. Attempt Libgen download
|
||||||
|
|
||||||
|
All sources integrated with proper metadata scraping and error handling.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import asyncio
|
||||||
|
import requests
|
||||||
|
from typing import Optional, Dict, Any, Tuple, List, Callable, cast
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from helper.logger import debug
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class UnifiedBookDownloader:
|
||||||
|
"""Unified interface for downloading books from multiple sources."""
|
||||||
|
|
||||||
|
def __init__(self, config: Optional[Dict[str, Any]] = None, output_dir: Optional[str] = None):
|
||||||
|
"""Initialize the unified book downloader.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config: Configuration dict with credentials
|
||||||
|
output_dir: Default output directory
|
||||||
|
"""
|
||||||
|
self.config = config or {}
|
||||||
|
self.output_dir = output_dir
|
||||||
|
self.session = requests.Session()
|
||||||
|
|
||||||
|
# Import download functions from their modules
|
||||||
|
self._init_downloaders()
|
||||||
|
|
||||||
|
def _init_downloaders(self) -> None:
|
||||||
|
"""Initialize downloader functions from their modules."""
|
||||||
|
try:
|
||||||
|
from helper.archive_client import (
|
||||||
|
check_direct_download,
|
||||||
|
get_openlibrary_by_isbn,
|
||||||
|
loan
|
||||||
|
)
|
||||||
|
self.check_direct_download = check_direct_download
|
||||||
|
self.get_openlibrary_by_isbn = get_openlibrary_by_isbn
|
||||||
|
self.loan_func = loan
|
||||||
|
logger.debug("[UnifiedBookDownloader] Loaded archive.org downloaders from archive_client")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"[UnifiedBookDownloader] Failed to load archive.org functions: {e}")
|
||||||
|
self.check_direct_download = None
|
||||||
|
self.get_openlibrary_by_isbn = None
|
||||||
|
self.loan_func = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
from helper.libgen_service import (
|
||||||
|
DEFAULT_LIMIT as _LIBGEN_DEFAULT_LIMIT,
|
||||||
|
download_from_mirror as _libgen_download,
|
||||||
|
search_libgen as _libgen_search,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _log_info(message: str) -> None:
|
||||||
|
debug(f"[UnifiedBookDownloader] {message}")
|
||||||
|
|
||||||
|
def _log_error(message: str) -> None:
|
||||||
|
logger.error(f"[UnifiedBookDownloader] {message}")
|
||||||
|
|
||||||
|
self.search_libgen = lambda query, limit=_LIBGEN_DEFAULT_LIMIT: _libgen_search(
|
||||||
|
query,
|
||||||
|
limit=limit,
|
||||||
|
log_info=_log_info,
|
||||||
|
log_error=_log_error,
|
||||||
|
)
|
||||||
|
self.download_from_mirror = lambda mirror_url, output_path: _libgen_download(
|
||||||
|
mirror_url,
|
||||||
|
output_path,
|
||||||
|
log_info=_log_info,
|
||||||
|
log_error=_log_error,
|
||||||
|
)
|
||||||
|
logger.debug("[UnifiedBookDownloader] Loaded Libgen helpers")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"[UnifiedBookDownloader] Failed to load Libgen helpers: {e}")
|
||||||
|
self.search_libgen = None
|
||||||
|
self.download_from_mirror = None
|
||||||
|
|
||||||
|
def get_download_options(self, book_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""Get all available download options for a book.
|
||||||
|
|
||||||
|
Checks in priority order:
|
||||||
|
1. Archive.org direct download (public domain)
|
||||||
|
2. Archive.org borrowing (if credentials available and book is borrowable)
|
||||||
|
3. Libgen fallback (by ISBN)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
book_data: Book metadata dict with at least 'openlibrary_id' or 'isbn'
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with available download methods and metadata
|
||||||
|
"""
|
||||||
|
options = {
|
||||||
|
'book_title': book_data.get('title', 'Unknown'),
|
||||||
|
'book_author': book_data.get('author', 'Unknown'),
|
||||||
|
'isbn': book_data.get('isbn', ''),
|
||||||
|
'openlibrary_id': book_data.get('openlibrary_id', ''),
|
||||||
|
'methods': [], # Will be sorted by priority
|
||||||
|
'metadata': {}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extract book ID from openlibrary_id (e.g., OL8513721M -> 8513721, OL8513721W -> 8513721)
|
||||||
|
ol_id = book_data.get('openlibrary_id', '')
|
||||||
|
book_id = None
|
||||||
|
|
||||||
|
if ol_id.startswith('OL') and len(ol_id) > 2:
|
||||||
|
# Remove 'OL' prefix (keep everything after it including the suffix letter)
|
||||||
|
# The book_id is all digits after 'OL'
|
||||||
|
book_id = ''.join(c for c in ol_id[2:] if c.isdigit())
|
||||||
|
|
||||||
|
# PRIORITY 1: Check direct download (fastest, no auth needed)
|
||||||
|
if self.check_direct_download:
|
||||||
|
try:
|
||||||
|
can_download, pdf_url = self.check_direct_download(book_id)
|
||||||
|
if can_download:
|
||||||
|
options['methods'].append({
|
||||||
|
'type': 'archive.org_direct',
|
||||||
|
'label': 'Archive.org Direct Download',
|
||||||
|
'requires_auth': False,
|
||||||
|
'pdf_url': pdf_url,
|
||||||
|
'book_id': book_id,
|
||||||
|
'priority': 1 # Highest priority
|
||||||
|
})
|
||||||
|
logger.info(f"[UnifiedBookDownloader] Direct download available for {book_id}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"[UnifiedBookDownloader] Direct download check failed: {e}")
|
||||||
|
|
||||||
|
# PRIORITY 2: Check borrowing option (requires auth, 14-day loan)
|
||||||
|
# First verify the book is actually lendable via OpenLibrary API
|
||||||
|
if self._has_archive_credentials():
|
||||||
|
is_lendable, status = self._check_book_lendable_status(ol_id)
|
||||||
|
|
||||||
|
if is_lendable:
|
||||||
|
options['methods'].append({
|
||||||
|
'type': 'archive.org_borrow',
|
||||||
|
'label': 'Archive.org Borrow',
|
||||||
|
'requires_auth': True,
|
||||||
|
'book_id': book_id,
|
||||||
|
'priority': 2 # Second priority
|
||||||
|
})
|
||||||
|
logger.info(f"[UnifiedBookDownloader] Borrow option available for {book_id} (status: {status})")
|
||||||
|
else:
|
||||||
|
logger.debug(f"[UnifiedBookDownloader] Borrow not available for {book_id} (status: {status})")
|
||||||
|
|
||||||
|
# PRIORITY 3: Check Libgen fallback (by ISBN, no auth needed, most reliable)
|
||||||
|
isbn = book_data.get('isbn', '')
|
||||||
|
title = book_data.get('title', '')
|
||||||
|
author = book_data.get('author', '')
|
||||||
|
|
||||||
|
if self.search_libgen:
|
||||||
|
# Can use Libgen if we have ISBN OR title (or both)
|
||||||
|
if isbn or title:
|
||||||
|
options['methods'].append({
|
||||||
|
'type': 'libgen',
|
||||||
|
'label': 'Libgen Search & Download',
|
||||||
|
'requires_auth': False,
|
||||||
|
'isbn': isbn,
|
||||||
|
'title': title,
|
||||||
|
'author': author,
|
||||||
|
'priority': 3 # Third priority (fallback)
|
||||||
|
})
|
||||||
|
logger.info(f"[UnifiedBookDownloader] Libgen fallback available (ISBN: {isbn if isbn else 'N/A'}, Title: {title})")
|
||||||
|
|
||||||
|
# Sort by priority (higher priority first)
|
||||||
|
options['methods'].sort(key=lambda x: x.get('priority', 999))
|
||||||
|
|
||||||
|
return options
|
||||||
|
|
||||||
|
def _has_archive_credentials(self) -> bool:
|
||||||
|
"""Check if Archive.org credentials are available."""
|
||||||
|
try:
|
||||||
|
from helper.archive_client import credential_openlibrary
|
||||||
|
email, password = credential_openlibrary(self.config)
|
||||||
|
return bool(email and password)
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _check_book_lendable_status(self, ol_id: str) -> Tuple[bool, Optional[str]]:
|
||||||
|
"""Check if a book is lendable via OpenLibrary API.
|
||||||
|
|
||||||
|
Queries: https://openlibrary.org/api/volumes/brief/json/OLID:{ol_id}
|
||||||
|
Note: Only works with Edition IDs (OL...M), not Work IDs (OL...W)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ol_id: OpenLibrary ID (e.g., OL8513721M for Edition or OL4801915W for Work)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (is_lendable: bool, status_reason: Optional[str])
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if not ol_id.startswith('OL'):
|
||||||
|
return False, "Invalid OpenLibrary ID format"
|
||||||
|
|
||||||
|
# If this is a Work ID (ends with W), we can't query Volumes API
|
||||||
|
# Work IDs are abstract umbrella records, not specific editions
|
||||||
|
if ol_id.endswith('W'):
|
||||||
|
logger.debug(f"[UnifiedBookDownloader] Work ID {ol_id} - skipping Volumes API (not lendable)")
|
||||||
|
return False, "Work ID not supported by Volumes API (not a specific edition)"
|
||||||
|
|
||||||
|
# If it ends with M, it's an Edition ID - proceed with query
|
||||||
|
if not ol_id.endswith('M'):
|
||||||
|
logger.debug(f"[UnifiedBookDownloader] Unknown ID type {ol_id} (not M or W)")
|
||||||
|
return False, "Invalid OpenLibrary ID type"
|
||||||
|
|
||||||
|
url = f"https://openlibrary.org/api/volumes/brief/json/OLID:{ol_id}"
|
||||||
|
response = self.session.get(url, timeout=10)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
# Empty response means no records found
|
||||||
|
if not data:
|
||||||
|
logger.debug(f"[UnifiedBookDownloader] Empty response for {ol_id}")
|
||||||
|
return False, "No availability data found"
|
||||||
|
|
||||||
|
# The response is wrapped in OLID key
|
||||||
|
olid_key = f"OLID:{ol_id}"
|
||||||
|
if olid_key not in data:
|
||||||
|
logger.debug(f"[UnifiedBookDownloader] OLID key not found in response")
|
||||||
|
return False, "No availability data found"
|
||||||
|
|
||||||
|
olid_data = data[olid_key]
|
||||||
|
|
||||||
|
# Check items array for lendable status
|
||||||
|
if 'items' in olid_data and olid_data['items'] and len(olid_data['items']) > 0:
|
||||||
|
items = olid_data['items']
|
||||||
|
|
||||||
|
# Check the first item for lending status
|
||||||
|
first_item = items[0]
|
||||||
|
|
||||||
|
# Handle both dict and string representations (PowerShell converts to string)
|
||||||
|
if isinstance(first_item, dict):
|
||||||
|
status = first_item.get('status', '')
|
||||||
|
else:
|
||||||
|
# String representation - check if 'lendable' is in it
|
||||||
|
status = str(first_item).lower()
|
||||||
|
|
||||||
|
is_lendable = 'lendable' in str(status).lower()
|
||||||
|
|
||||||
|
if is_lendable:
|
||||||
|
logger.info(f"[UnifiedBookDownloader] Book {ol_id} is lendable")
|
||||||
|
return True, "LENDABLE"
|
||||||
|
else:
|
||||||
|
status_str = status.get('status', 'NOT_LENDABLE') if isinstance(status, dict) else 'NOT_LENDABLE'
|
||||||
|
logger.debug(f"[UnifiedBookDownloader] Book {ol_id} is not lendable (status: {status_str})")
|
||||||
|
return False, status_str
|
||||||
|
else:
|
||||||
|
# No items array or empty
|
||||||
|
logger.debug(f"[UnifiedBookDownloader] No items found for {ol_id}")
|
||||||
|
return False, "Not available for lending"
|
||||||
|
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
logger.warning(f"[UnifiedBookDownloader] OpenLibrary API timeout for {ol_id}")
|
||||||
|
return False, "API timeout"
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"[UnifiedBookDownloader] Failed to check lendable status for {ol_id}: {e}")
|
||||||
|
return False, f"API error"
|
||||||
|
|
||||||
|
|
||||||
|
async def download_book(self, method: Dict[str, Any], output_dir: Optional[str] = None) -> Tuple[bool, str]:
|
||||||
|
"""Download a book using the specified method.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
method: Download method dict from get_download_options()
|
||||||
|
output_dir: Directory to save the book
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (success: bool, message: str)
|
||||||
|
"""
|
||||||
|
output_dir = output_dir or self.output_dir or str(Path.home() / "Downloads")
|
||||||
|
method_type = method.get('type', '')
|
||||||
|
|
||||||
|
logger.info(f"[UnifiedBookDownloader] Starting download with method: {method_type}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
if method_type == 'archive.org_direct':
|
||||||
|
return await self._download_archive_direct(method, output_dir)
|
||||||
|
|
||||||
|
elif method_type == 'archive.org_borrow':
|
||||||
|
return await self._download_archive_borrow(method, output_dir)
|
||||||
|
|
||||||
|
elif method_type == 'libgen':
|
||||||
|
return await self._download_libgen(method, output_dir)
|
||||||
|
|
||||||
|
else:
|
||||||
|
return False, f"Unknown download method: {method_type}"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[UnifiedBookDownloader] Download error: {e}", exc_info=True)
|
||||||
|
return False, f"Download failed: {str(e)}"
|
||||||
|
|
||||||
|
async def _download_archive_direct(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]:
|
||||||
|
"""Download directly from Archive.org."""
|
||||||
|
try:
|
||||||
|
pdf_url = method.get('pdf_url', '')
|
||||||
|
book_id = method.get('book_id', '')
|
||||||
|
|
||||||
|
if not pdf_url:
|
||||||
|
return False, "No PDF URL available"
|
||||||
|
|
||||||
|
# Determine output filename
|
||||||
|
filename = f"{book_id}.pdf"
|
||||||
|
output_path = Path(output_dir) / filename
|
||||||
|
|
||||||
|
logger.info(f"[UnifiedBookDownloader] Downloading PDF from: {pdf_url}")
|
||||||
|
|
||||||
|
# Download in a thread to avoid blocking
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
success = await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
self._download_file,
|
||||||
|
pdf_url,
|
||||||
|
str(output_path)
|
||||||
|
)
|
||||||
|
|
||||||
|
if success:
|
||||||
|
logger.info(f"[UnifiedBookDownloader] Successfully downloaded to: {output_path}")
|
||||||
|
return True, f"Downloaded to: {output_path}"
|
||||||
|
else:
|
||||||
|
return False, "Failed to download PDF"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[UnifiedBookDownloader] Archive direct download error: {e}")
|
||||||
|
return False, f"Archive download failed: {str(e)}"
|
||||||
|
|
||||||
|
async def _download_archive_borrow(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]:
|
||||||
|
"""Download via Archive.org borrowing (requires credentials).
|
||||||
|
|
||||||
|
Process (follows archive_client.py pattern):
|
||||||
|
1. Login to Archive.org with credentials
|
||||||
|
2. Call loan endpoint to borrow the book (14-day loan)
|
||||||
|
3. Get book info (page links, metadata)
|
||||||
|
4. Download all pages as images
|
||||||
|
5. Merge images into PDF
|
||||||
|
|
||||||
|
The loan function from archive_client.py handles:
|
||||||
|
- Checking if book needs borrowing (status 400 = "doesn't need to be borrowed")
|
||||||
|
- Creating borrow token for access
|
||||||
|
- Handling borrow failures
|
||||||
|
|
||||||
|
get_book_infos() extracts page links from the borrowed book viewer
|
||||||
|
download() downloads all pages using thread pool
|
||||||
|
img2pdf merges pages into searchable PDF
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from helper.archive_client import credential_openlibrary
|
||||||
|
|
||||||
|
book_id = method.get('book_id', '')
|
||||||
|
|
||||||
|
# Get credentials
|
||||||
|
email, password = credential_openlibrary(self.config)
|
||||||
|
if not email or not password:
|
||||||
|
return False, "Archive.org credentials not configured"
|
||||||
|
|
||||||
|
logger.info(f"[UnifiedBookDownloader] Logging into Archive.org...")
|
||||||
|
|
||||||
|
# Login and borrow (in thread, following download_book.py pattern)
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
borrow_result = await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
self._archive_borrow_and_download,
|
||||||
|
email,
|
||||||
|
password,
|
||||||
|
book_id,
|
||||||
|
output_dir
|
||||||
|
)
|
||||||
|
|
||||||
|
if borrow_result and isinstance(borrow_result, tuple):
|
||||||
|
success, filepath = borrow_result
|
||||||
|
if success:
|
||||||
|
logger.info(f"[UnifiedBookDownloader] Borrow succeeded: {filepath}")
|
||||||
|
return True, filepath
|
||||||
|
else:
|
||||||
|
logger.warning(f"[UnifiedBookDownloader] Borrow failed: {filepath}")
|
||||||
|
return False, filepath
|
||||||
|
else:
|
||||||
|
return False, "Failed to borrow book from Archive.org"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[UnifiedBookDownloader] Archive borrow error: {e}")
|
||||||
|
return False, f"Archive borrow failed: {str(e)}"
|
||||||
|
|
||||||
|
async def _download_libgen(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]:
|
||||||
|
"""Download via Libgen search and download with mirror fallback."""
|
||||||
|
try:
|
||||||
|
isbn = method.get('isbn', '')
|
||||||
|
title = method.get('title', '')
|
||||||
|
|
||||||
|
if not isbn and not title:
|
||||||
|
return False, "Need ISBN or title for Libgen search"
|
||||||
|
|
||||||
|
if not self.search_libgen:
|
||||||
|
return False, "Libgen searcher not available"
|
||||||
|
|
||||||
|
# Define wrapper functions to safely call the methods
|
||||||
|
search_func = self.search_libgen
|
||||||
|
if search_func is None:
|
||||||
|
return False, "Search function not available"
|
||||||
|
|
||||||
|
preloaded_results = method.get('results')
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
|
||||||
|
if preloaded_results:
|
||||||
|
results = list(preloaded_results)
|
||||||
|
if not results:
|
||||||
|
results = await loop.run_in_executor(None, lambda: search_func(isbn or title, 10))
|
||||||
|
else:
|
||||||
|
results = await loop.run_in_executor(None, lambda: search_func(isbn or title, 10))
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
logger.warning(f"[UnifiedBookDownloader] No Libgen results for: {isbn or title}")
|
||||||
|
return False, f"No Libgen results found for: {isbn or title}"
|
||||||
|
|
||||||
|
logger.info(f"[UnifiedBookDownloader] Found {len(results)} Libgen results")
|
||||||
|
|
||||||
|
# Determine output filename (use first result for naming)
|
||||||
|
first_result = results[0]
|
||||||
|
filename = f"{first_result.get('title', 'book')}"
|
||||||
|
filename = "".join(c for c in filename if c.isalnum() or c in (' ', '.', '-'))[:100]
|
||||||
|
|
||||||
|
# Try each result's mirror until one succeeds
|
||||||
|
for idx, result in enumerate(results, 1):
|
||||||
|
mirror_url = result.get('mirror_url', '')
|
||||||
|
|
||||||
|
if not mirror_url:
|
||||||
|
logger.debug(f"[UnifiedBookDownloader] Result {idx}: No mirror URL")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Use extension from this result if available
|
||||||
|
extension = result.get('extension', 'pdf')
|
||||||
|
if extension and not extension.startswith('.'):
|
||||||
|
extension = f".{extension}"
|
||||||
|
elif not extension:
|
||||||
|
extension = '.pdf'
|
||||||
|
|
||||||
|
output_path = Path(output_dir) / (filename + extension)
|
||||||
|
|
||||||
|
logger.info(f"[UnifiedBookDownloader] Trying mirror {idx}/{len(results)}: {mirror_url}")
|
||||||
|
|
||||||
|
download_func = self.download_from_mirror
|
||||||
|
if download_func is None:
|
||||||
|
return False, "Download function not available"
|
||||||
|
|
||||||
|
download_callable = cast(Callable[[str, str], bool], download_func)
|
||||||
|
|
||||||
|
def download_wrapper():
|
||||||
|
return download_callable(mirror_url, str(output_path))
|
||||||
|
|
||||||
|
# Download (in thread)
|
||||||
|
try:
|
||||||
|
success = await loop.run_in_executor(None, download_wrapper)
|
||||||
|
|
||||||
|
if success:
|
||||||
|
# Validate downloaded file is not HTML (common Libgen issue)
|
||||||
|
if output_path.exists():
|
||||||
|
try:
|
||||||
|
with open(output_path, 'rb') as f:
|
||||||
|
file_start = f.read(1024).decode('utf-8', errors='ignore').lower()
|
||||||
|
if '<!doctype' in file_start or '<html' in file_start:
|
||||||
|
logger.warning(f"[UnifiedBookDownloader] Mirror {idx} returned HTML instead of file, trying next mirror...")
|
||||||
|
output_path.unlink() # Delete the HTML file
|
||||||
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"[UnifiedBookDownloader] Could not validate file content: {e}")
|
||||||
|
|
||||||
|
logger.info(f"[UnifiedBookDownloader] Successfully downloaded from mirror {idx} to: {output_path}")
|
||||||
|
return True, str(output_path)
|
||||||
|
else:
|
||||||
|
logger.warning(f"[UnifiedBookDownloader] Mirror {idx} download failed, trying next...")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"[UnifiedBookDownloader] Mirror {idx} error: {e}, trying next...")
|
||||||
|
continue
|
||||||
|
|
||||||
|
return False, f"All {len(results)} mirrors failed"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[UnifiedBookDownloader] Libgen download error: {e}")
|
||||||
|
return False, f"Libgen download failed: {str(e)}"
|
||||||
|
|
||||||
|
async def download_libgen_selection(
|
||||||
|
self,
|
||||||
|
selected: Dict[str, Any],
|
||||||
|
remaining: Optional[List[Dict[str, Any]]] = None,
|
||||||
|
output_dir: Optional[str] = None,
|
||||||
|
) -> Tuple[bool, str]:
|
||||||
|
"""Download a specific Libgen result with optional fallbacks."""
|
||||||
|
|
||||||
|
if not isinstance(selected, dict):
|
||||||
|
return False, "Selected result must be a dictionary"
|
||||||
|
|
||||||
|
ordered_results: List[Dict[str, Any]] = [selected]
|
||||||
|
if remaining:
|
||||||
|
for item in remaining:
|
||||||
|
if isinstance(item, dict) and item is not selected:
|
||||||
|
ordered_results.append(item)
|
||||||
|
|
||||||
|
method: Dict[str, Any] = {
|
||||||
|
'type': 'libgen',
|
||||||
|
'isbn': selected.get('isbn', '') or '',
|
||||||
|
'title': selected.get('title', '') or '',
|
||||||
|
'author': selected.get('author', '') or '',
|
||||||
|
'results': ordered_results,
|
||||||
|
}
|
||||||
|
|
||||||
|
return await self.download_book(method, output_dir)
|
||||||
|
|
||||||
|
def download_libgen_selection_sync(
|
||||||
|
self,
|
||||||
|
selected: Dict[str, Any],
|
||||||
|
remaining: Optional[List[Dict[str, Any]]] = None,
|
||||||
|
output_dir: Optional[str] = None,
|
||||||
|
) -> Tuple[bool, str]:
|
||||||
|
"""Synchronous helper for downloading a Libgen selection."""
|
||||||
|
|
||||||
|
async def _run() -> Tuple[bool, str]:
|
||||||
|
return await self.download_libgen_selection(selected, remaining, output_dir)
|
||||||
|
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
try:
|
||||||
|
asyncio.set_event_loop(loop)
|
||||||
|
return loop.run_until_complete(_run())
|
||||||
|
finally:
|
||||||
|
loop.close()
|
||||||
|
asyncio.set_event_loop(None)
|
||||||
|
|
||||||
|
def _download_file(self, url: str, output_path: str) -> bool:
|
||||||
|
"""Download a file from URL."""
|
||||||
|
try:
|
||||||
|
response = requests.get(url, stream=True, timeout=30)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
with open(output_path, 'wb') as f:
|
||||||
|
for chunk in response.iter_content(chunk_size=8192):
|
||||||
|
if chunk:
|
||||||
|
f.write(chunk)
|
||||||
|
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[UnifiedBookDownloader] File download error: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _archive_borrow_and_download(self, email: str, password: str, book_id: str, output_dir: str) -> Tuple[bool, str]:
|
||||||
|
"""Borrow a book from Archive.org and download pages as PDF.
|
||||||
|
|
||||||
|
This follows the exact process from archive_client.py:
|
||||||
|
1. Login with credentials
|
||||||
|
2. Call loan() to create 14-day borrow
|
||||||
|
3. Get book info (extract page URLs)
|
||||||
|
4. Download all pages as images
|
||||||
|
5. Merge images into searchable PDF
|
||||||
|
|
||||||
|
Returns tuple of (success: bool, filepath/message: str)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from helper.archive_client import login, loan, get_book_infos, download
|
||||||
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
logger.info(f"[UnifiedBookDownloader] Logging into Archive.org as {email}")
|
||||||
|
session = login(email, password)
|
||||||
|
|
||||||
|
logger.info(f"[UnifiedBookDownloader] Attempting to borrow book: {book_id}")
|
||||||
|
# Call loan to create the 14-day borrow
|
||||||
|
session = loan(session, book_id, verbose=True)
|
||||||
|
|
||||||
|
# If we get here, borrowing succeeded
|
||||||
|
logger.info(f"[UnifiedBookDownloader] Successfully borrowed book: {book_id}")
|
||||||
|
|
||||||
|
# Now get the book info (page URLs and metadata)
|
||||||
|
logger.info(f"[UnifiedBookDownloader] Extracting book page information...")
|
||||||
|
# Try both URL formats: with /borrow and without
|
||||||
|
book_urls = [
|
||||||
|
f"https://archive.org/borrow/{book_id}", # Try borrow page first (for borrowed books)
|
||||||
|
f"https://archive.org/details/{book_id}" # Fallback to details page
|
||||||
|
]
|
||||||
|
|
||||||
|
title = None
|
||||||
|
links = None
|
||||||
|
metadata = None
|
||||||
|
last_error = None
|
||||||
|
|
||||||
|
for book_url in book_urls:
|
||||||
|
try:
|
||||||
|
logger.debug(f"[UnifiedBookDownloader] Trying to get book info from: {book_url}")
|
||||||
|
response = session.get(book_url, timeout=10)
|
||||||
|
|
||||||
|
# Log response status
|
||||||
|
if response.status_code != 200:
|
||||||
|
logger.debug(f"[UnifiedBookDownloader] URL returned {response.status_code}: {book_url}")
|
||||||
|
# Continue to try next URL
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Try to parse the response
|
||||||
|
title, links, metadata = get_book_infos(session, book_url)
|
||||||
|
logger.info(f"[UnifiedBookDownloader] Successfully got info from: {book_url}")
|
||||||
|
logger.info(f"[UnifiedBookDownloader] Found {len(links)} pages to download")
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"[UnifiedBookDownloader] Failed with {book_url}: {e}")
|
||||||
|
last_error = e
|
||||||
|
continue
|
||||||
|
|
||||||
|
if links is None:
|
||||||
|
logger.error(f"[UnifiedBookDownloader] Failed to get book info from all URLs: {last_error}")
|
||||||
|
# Borrow extraction failed - return False
|
||||||
|
return False, "Could not extract borrowed book pages"
|
||||||
|
|
||||||
|
# Create temporary directory for images
|
||||||
|
temp_dir = tempfile.mkdtemp(prefix=f"{title}_", dir=output_dir)
|
||||||
|
logger.info(f"[UnifiedBookDownloader] Downloading {len(links)} pages to temporary directory...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Download all pages (uses thread pool)
|
||||||
|
images = download(
|
||||||
|
session=session,
|
||||||
|
n_threads=10,
|
||||||
|
directory=temp_dir,
|
||||||
|
links=links,
|
||||||
|
scale=3, # Default resolution
|
||||||
|
book_id=book_id
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"[UnifiedBookDownloader] Downloaded {len(images)} pages")
|
||||||
|
|
||||||
|
# Try to merge pages into PDF
|
||||||
|
try:
|
||||||
|
import img2pdf
|
||||||
|
logger.info(f"[UnifiedBookDownloader] Merging pages into PDF...")
|
||||||
|
|
||||||
|
# Prepare PDF metadata
|
||||||
|
pdfmeta = {}
|
||||||
|
if metadata:
|
||||||
|
if "title" in metadata:
|
||||||
|
pdfmeta["title"] = metadata["title"]
|
||||||
|
if "creator" in metadata:
|
||||||
|
pdfmeta["author"] = metadata["creator"]
|
||||||
|
pdfmeta["keywords"] = [f"https://archive.org/details/{book_id}"]
|
||||||
|
pdfmeta["creationdate"] = None # Avoid timezone issues
|
||||||
|
|
||||||
|
# Convert images to PDF
|
||||||
|
pdf_content = img2pdf.convert(images, **pdfmeta) if images else None
|
||||||
|
if not pdf_content:
|
||||||
|
logger.error(f"[UnifiedBookDownloader] PDF conversion failed")
|
||||||
|
return False, "Failed to convert pages to PDF"
|
||||||
|
|
||||||
|
# Save the PDF
|
||||||
|
pdf_filename = f"{title}.pdf" if title else "book.pdf"
|
||||||
|
pdf_path = Path(output_dir) / pdf_filename
|
||||||
|
|
||||||
|
# Handle duplicate filenames
|
||||||
|
i = 1
|
||||||
|
while pdf_path.exists():
|
||||||
|
pdf_path = Path(output_dir) / f"{title or 'book'}({i}).pdf"
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
with open(pdf_path, 'wb') as f:
|
||||||
|
f.write(pdf_content)
|
||||||
|
|
||||||
|
logger.info(f"[UnifiedBookDownloader] Successfully created PDF: {pdf_path}")
|
||||||
|
|
||||||
|
return True, str(pdf_path)
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
logger.warning(f"[UnifiedBookDownloader] img2pdf not available, saving as JPG collection instead")
|
||||||
|
|
||||||
|
# Create JPG collection directory
|
||||||
|
if not title:
|
||||||
|
title = f"book_{book_id}"
|
||||||
|
jpg_dir = Path(output_dir) / title
|
||||||
|
i = 1
|
||||||
|
while jpg_dir.exists():
|
||||||
|
jpg_dir = Path(output_dir) / f"{title}({i})"
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
# Move temporary directory to final location
|
||||||
|
shutil.move(temp_dir, str(jpg_dir))
|
||||||
|
temp_dir = None # Mark as already moved
|
||||||
|
|
||||||
|
logger.info(f"[UnifiedBookDownloader] Saved as JPG collection: {jpg_dir}")
|
||||||
|
return True, str(jpg_dir)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Clean up temporary directory if it still exists
|
||||||
|
if temp_dir and Path(temp_dir).exists():
|
||||||
|
shutil.rmtree(temp_dir)
|
||||||
|
|
||||||
|
except SystemExit:
|
||||||
|
# loan() function calls sys.exit on failure - catch it
|
||||||
|
logger.error(f"[UnifiedBookDownloader] Borrow process exited (book may not be borrowable)")
|
||||||
|
return False, "Book could not be borrowed (may not be available for borrowing)"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[UnifiedBookDownloader] Archive borrow error: {e}")
|
||||||
|
return False, f"Borrow failed: {str(e)}"
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
"""Close the session."""
|
||||||
|
self.session.close()
|
||||||
492
helper/utils.py
Normal file
492
helper/utils.py
Normal file
@@ -0,0 +1,492 @@
|
|||||||
|
"""General-purpose helpers used across the downlow CLI."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import hashlib
|
||||||
|
import ffmpeg
|
||||||
|
import base64
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Iterable
|
||||||
|
from datetime import datetime
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from fnmatch import fnmatch
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import helper.utils_constant
|
||||||
|
|
||||||
|
try:
|
||||||
|
import cbor2
|
||||||
|
except ImportError:
|
||||||
|
cbor2 = None # type: ignore
|
||||||
|
|
||||||
|
CHUNK_SIZE = 1024 * 1024 # 1 MiB
|
||||||
|
_format_logger = logging.getLogger(__name__)
|
||||||
|
def ensure_directory(path: Path) -> None:
|
||||||
|
"""Ensure *path* exists as a directory."""
|
||||||
|
try:
|
||||||
|
path.mkdir(parents=True, exist_ok=True)
|
||||||
|
except OSError as exc: # pragma: no cover - surfaced to caller
|
||||||
|
raise RuntimeError(f"Failed to create directory {path}: {exc}") from exc
|
||||||
|
def unique_path(path: Path) -> Path:
|
||||||
|
"""Return a unique path by appending " (n)" if needed."""
|
||||||
|
if not path.exists():
|
||||||
|
return path
|
||||||
|
stem = path.stem
|
||||||
|
suffix = path.suffix
|
||||||
|
parent = path.parent
|
||||||
|
counter = 1
|
||||||
|
while True:
|
||||||
|
candidate = parent / f"{stem} ({counter}){suffix}"
|
||||||
|
if not candidate.exists():
|
||||||
|
return candidate
|
||||||
|
counter += 1
|
||||||
|
|
||||||
|
def sanitize_metadata_value(value: Any) -> str | None:
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
if not isinstance(value, str):
|
||||||
|
value = str(value)
|
||||||
|
value = value.replace('\x00', ' ').replace('\r', ' ').replace('\n', ' ').strip()
|
||||||
|
if not value:
|
||||||
|
return None
|
||||||
|
return value
|
||||||
|
def unique_preserve_order(values: Iterable[str]) -> list[str]:
|
||||||
|
seen: set[str] = set()
|
||||||
|
ordered: list[str] = []
|
||||||
|
for value in values:
|
||||||
|
if value not in seen:
|
||||||
|
seen.add(value)
|
||||||
|
ordered.append(value)
|
||||||
|
return ordered
|
||||||
|
def sha256_file(file_path: Path) -> str:
|
||||||
|
"""Return the SHA-256 hex digest of *path*."""
|
||||||
|
hasher = hashlib.sha256()
|
||||||
|
with file_path.open('rb') as handle:
|
||||||
|
for chunk in iter(lambda: handle.read(CHUNK_SIZE), b''):
|
||||||
|
hasher.update(chunk)
|
||||||
|
return hasher.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def create_metadata_sidecar(file_path: Path, metadata: dict) -> None:
|
||||||
|
"""Create a .metadata sidecar file with JSON metadata.
|
||||||
|
|
||||||
|
The metadata dict should contain title. If not present, it will be derived from
|
||||||
|
the filename. This ensures the .metadata file can be matched during batch import.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: Path to the exported file
|
||||||
|
metadata: Dictionary of metadata to save
|
||||||
|
"""
|
||||||
|
if not metadata:
|
||||||
|
return
|
||||||
|
file_name = file_path.stem
|
||||||
|
file_ext = file_path.suffix.lower()
|
||||||
|
# Ensure metadata has a title field that matches the filename (without extension)
|
||||||
|
# This allows the sidecar to be matched and imported properly during batch import
|
||||||
|
if 'title' not in metadata or not metadata.get('title'):
|
||||||
|
metadata['title'] = file_name
|
||||||
|
metadata['hash'] = sha256_file(file_path)
|
||||||
|
metadata['size'] = Path(file_path).stat().st_size
|
||||||
|
format_found = False
|
||||||
|
for mime_type, ext_map in helper.utils_constant.mime_maps.items():
|
||||||
|
for key, info in ext_map.items():
|
||||||
|
if info.get("ext") == file_ext:
|
||||||
|
metadata['type'] = mime_type
|
||||||
|
format_found = True
|
||||||
|
break
|
||||||
|
if format_found:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
metadata['type'] = 'unknown'
|
||||||
|
metadata.update(ffprobe(str(file_path)))
|
||||||
|
|
||||||
|
|
||||||
|
metadata_path = file_path.with_suffix(file_path.suffix + '.metadata')
|
||||||
|
try:
|
||||||
|
with open(metadata_path, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(metadata, f, ensure_ascii=False, indent=2)
|
||||||
|
except OSError as exc:
|
||||||
|
raise RuntimeError(f"Failed to write metadata sidecar {metadata_path}: {exc}") from exc
|
||||||
|
|
||||||
|
def create_tags_sidecar(file_path: Path, tags: set) -> None:
|
||||||
|
"""Create a .tags sidecar file with tags (one per line).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: Path to the exported file
|
||||||
|
tags: Set of tag strings
|
||||||
|
"""
|
||||||
|
if not tags:
|
||||||
|
return
|
||||||
|
|
||||||
|
tags_path = file_path.with_suffix(file_path.suffix + '.tags')
|
||||||
|
try:
|
||||||
|
with open(tags_path, 'w', encoding='utf-8') as f:
|
||||||
|
for tag in sorted(tags):
|
||||||
|
f.write(f"{tag}\n")
|
||||||
|
except Exception as e:
|
||||||
|
raise RuntimeError(f"Failed to create tags sidecar {tags_path}: {e}") from e
|
||||||
|
|
||||||
|
|
||||||
|
def ffprobe(file_path: str) -> dict:
|
||||||
|
probe = ffmpeg.probe(file_path)
|
||||||
|
metadata = {}
|
||||||
|
|
||||||
|
# Format-level info
|
||||||
|
fmt = probe.get("format", {})
|
||||||
|
metadata["duration"] = float(fmt.get("duration", 0)) if "duration" in fmt else None
|
||||||
|
metadata["size"] = int(fmt.get("size", 0)) if "size" in fmt else None
|
||||||
|
metadata["format_name"] = fmt.get("format_name", None)
|
||||||
|
|
||||||
|
# Stream-level info
|
||||||
|
for stream in probe.get("streams", []):
|
||||||
|
codec_type = stream.get("codec_type")
|
||||||
|
if codec_type == "audio":
|
||||||
|
metadata["audio_codec"] = stream.get("codec_name")
|
||||||
|
metadata["bitrate"] = int(stream.get("bit_rate", 0)) if "bit_rate" in stream else None
|
||||||
|
metadata["samplerate"] = int(stream.get("sample_rate", 0)) if "sample_rate" in stream else None
|
||||||
|
metadata["channels"] = int(stream.get("channels", 0)) if "channels" in stream else None
|
||||||
|
elif codec_type == "video":
|
||||||
|
metadata["video_codec"] = stream.get("codec_name")
|
||||||
|
metadata["width"] = int(stream.get("width", 0)) if "width" in stream else None
|
||||||
|
metadata["height"] = int(stream.get("height", 0)) if "height" in stream else None
|
||||||
|
elif codec_type == "image":
|
||||||
|
metadata["image_codec"] = stream.get("codec_name")
|
||||||
|
metadata["width"] = int(stream.get("width", 0)) if "width" in stream else None
|
||||||
|
metadata["height"] = int(stream.get("height", 0)) if "height" in stream else None
|
||||||
|
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# CBOR Utilities - Consolidated from cbor.py
|
||||||
|
# ============================================================================
|
||||||
|
"""CBOR utilities backed by the `cbor2` library."""
|
||||||
|
|
||||||
|
|
||||||
|
def decode_cbor(data: bytes) -> Any:
|
||||||
|
"""Decode *data* from CBOR into native Python objects."""
|
||||||
|
if not data:
|
||||||
|
return None
|
||||||
|
if cbor2 is None:
|
||||||
|
raise ImportError("cbor2 library is required for CBOR decoding")
|
||||||
|
return cbor2.loads(data)
|
||||||
|
|
||||||
|
|
||||||
|
def jsonify(value: Any) -> Any:
|
||||||
|
"""Convert *value* into a JSON-friendly structure."""
|
||||||
|
if isinstance(value, dict):
|
||||||
|
return {str(key): jsonify(val) for key, val in value.items()}
|
||||||
|
if isinstance(value, list):
|
||||||
|
return [jsonify(item) for item in value]
|
||||||
|
if isinstance(value, bytes):
|
||||||
|
return {"__bytes__": base64.b64encode(value).decode("ascii")}
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Format Utilities - Consolidated from format_utils.py
|
||||||
|
# ============================================================================
|
||||||
|
"""Formatting utilities for displaying metadata consistently across the application."""
|
||||||
|
|
||||||
|
|
||||||
|
def format_bytes(bytes_value) -> str:
|
||||||
|
"""Format bytes to human-readable format (e.g., '1.5 MB', '250 KB').
|
||||||
|
|
||||||
|
Args:
|
||||||
|
bytes_value: Size in bytes (int or float)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Formatted string like '1.5 MB' or '756 MB'
|
||||||
|
"""
|
||||||
|
if bytes_value is None or bytes_value <= 0:
|
||||||
|
return "0 B"
|
||||||
|
|
||||||
|
if isinstance(bytes_value, (int, float)):
|
||||||
|
for unit in ("B", "KB", "MB", "GB", "TB"):
|
||||||
|
if bytes_value < 1024:
|
||||||
|
if unit == "B":
|
||||||
|
return f"{int(bytes_value)} {unit}"
|
||||||
|
return f"{bytes_value:.1f} {unit}"
|
||||||
|
bytes_value /= 1024
|
||||||
|
return f"{bytes_value:.1f} PB"
|
||||||
|
return str(bytes_value)
|
||||||
|
|
||||||
|
|
||||||
|
def format_duration(seconds) -> str:
|
||||||
|
"""Format duration in seconds to human-readable format (e.g., '1h 23m 5s', '5m 30s').
|
||||||
|
|
||||||
|
Args:
|
||||||
|
seconds: Duration in seconds (int or float)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Formatted string like '1:23:45' or '5:30'
|
||||||
|
"""
|
||||||
|
if seconds is None or seconds == '':
|
||||||
|
return "N/A"
|
||||||
|
|
||||||
|
if isinstance(seconds, str):
|
||||||
|
try:
|
||||||
|
seconds = float(seconds)
|
||||||
|
except ValueError:
|
||||||
|
return str(seconds)
|
||||||
|
|
||||||
|
if not isinstance(seconds, (int, float)):
|
||||||
|
return str(seconds)
|
||||||
|
|
||||||
|
total_seconds = int(seconds)
|
||||||
|
if total_seconds < 0:
|
||||||
|
return "N/A"
|
||||||
|
|
||||||
|
hours = total_seconds // 3600
|
||||||
|
minutes = (total_seconds % 3600) // 60
|
||||||
|
secs = total_seconds % 60
|
||||||
|
|
||||||
|
if hours > 0:
|
||||||
|
return f"{hours}:{minutes:02d}:{secs:02d}"
|
||||||
|
elif minutes > 0:
|
||||||
|
return f"{minutes}:{secs:02d}"
|
||||||
|
else:
|
||||||
|
return f"{secs}s"
|
||||||
|
|
||||||
|
|
||||||
|
def format_timestamp(timestamp_str) -> str:
|
||||||
|
"""Format ISO timestamp to readable format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
timestamp_str: ISO format timestamp string or None
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Formatted string like "2025-10-28 19:36:01" or original string if parsing fails
|
||||||
|
"""
|
||||||
|
if not timestamp_str:
|
||||||
|
return "N/A"
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Handle ISO format timestamps
|
||||||
|
if isinstance(timestamp_str, str):
|
||||||
|
# Try parsing ISO format
|
||||||
|
if 'T' in timestamp_str:
|
||||||
|
dt = datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
|
||||||
|
else:
|
||||||
|
# Try other common formats
|
||||||
|
dt = datetime.fromisoformat(timestamp_str)
|
||||||
|
return dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
except Exception as e:
|
||||||
|
_format_logger.debug(f"Could not parse timestamp '{timestamp_str}': {e}")
|
||||||
|
|
||||||
|
return str(timestamp_str)
|
||||||
|
|
||||||
|
|
||||||
|
def format_metadata_value(key: str, value) -> str:
|
||||||
|
"""Format a metadata value based on its key for display.
|
||||||
|
|
||||||
|
This is the central formatting rule for all metadata display.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key: Metadata field name
|
||||||
|
value: Value to format
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Formatted string for display
|
||||||
|
"""
|
||||||
|
if value is None or value == '':
|
||||||
|
return "N/A"
|
||||||
|
|
||||||
|
# Apply field-specific formatting
|
||||||
|
if key in ('size', 'file_size'):
|
||||||
|
return format_bytes(value)
|
||||||
|
elif key in ('duration', 'length'):
|
||||||
|
return format_duration(value)
|
||||||
|
elif key in ('time_modified', 'time_imported', 'created_at', 'updated_at', 'indexed_at', 'timestamp'):
|
||||||
|
return format_timestamp(value)
|
||||||
|
else:
|
||||||
|
return str(value)
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Link Utilities - Consolidated from link_utils.py
|
||||||
|
# ============================================================================
|
||||||
|
"""Link utilities - Extract and process URLs from various sources."""
|
||||||
|
|
||||||
|
|
||||||
|
def extract_link_from_args(args: Iterable[str]) -> Any | None:
|
||||||
|
"""Extract HTTP/HTTPS URL from command arguments.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
args: Command arguments
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
URL string if found, None otherwise
|
||||||
|
"""
|
||||||
|
args_list = list(args) if not isinstance(args, (list, tuple)) else args
|
||||||
|
if not args_list or len(args_list) == 0:
|
||||||
|
return None
|
||||||
|
|
||||||
|
potential_link = str(args_list[0])
|
||||||
|
if potential_link.startswith(('http://', 'https://')):
|
||||||
|
return potential_link
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_link_from_result(result: Any) -> Any | None:
|
||||||
|
"""Extract URL from a result object (dict or object with attributes).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
result: Result object from pipeline (dict or object)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
URL string if found, None otherwise
|
||||||
|
"""
|
||||||
|
if isinstance(result, dict):
|
||||||
|
return result.get('url') or result.get('link') or result.get('href')
|
||||||
|
|
||||||
|
return (
|
||||||
|
getattr(result, 'url', None) or
|
||||||
|
getattr(result, 'link', None) or
|
||||||
|
getattr(result, 'href', None)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_link(result: Any, args: Iterable[str]) -> Any | None:
|
||||||
|
"""Extract link from args or result (args take priority).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
result: Pipeline result object
|
||||||
|
args: Command arguments
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
URL string if found, None otherwise
|
||||||
|
"""
|
||||||
|
# Try args first
|
||||||
|
link = extract_link_from_args(args)
|
||||||
|
if link:
|
||||||
|
return link
|
||||||
|
|
||||||
|
# Fall back to result
|
||||||
|
return extract_link_from_result(result)
|
||||||
|
|
||||||
|
|
||||||
|
def get_api_key(config: dict[str, Any], service: str, key_path: str) -> str | None:
|
||||||
|
"""Get API key from config with fallback support.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config: Configuration dictionary
|
||||||
|
service: Service name for logging
|
||||||
|
key_path: Dot-notation path to key (e.g., "Debrid.All-debrid")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
API key if found and not empty, None otherwise
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
parts = key_path.split('.')
|
||||||
|
value = config
|
||||||
|
for part in parts:
|
||||||
|
if isinstance(value, dict):
|
||||||
|
value = value.get(part)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if isinstance(value, str):
|
||||||
|
return value.strip() or None
|
||||||
|
|
||||||
|
return None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def add_direct_link_to_result(result: Any, direct_link: str, original_link: str) -> None:
|
||||||
|
"""Add direct link information to result object.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
result: Result object to modify (dict or object)
|
||||||
|
direct_link: The unlocked/direct URL
|
||||||
|
original_link: The original restricted URL
|
||||||
|
"""
|
||||||
|
if isinstance(result, dict):
|
||||||
|
result['direct_link'] = direct_link
|
||||||
|
result['original_link'] = original_link
|
||||||
|
else:
|
||||||
|
setattr(result, 'direct_link', direct_link)
|
||||||
|
setattr(result, 'original_link', original_link)
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# URL Policy Resolution - Consolidated from url_parser.py
|
||||||
|
# ============================================================================
|
||||||
|
"""URL policy resolution for downlow workflows."""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class UrlPolicy:
|
||||||
|
"""Describe how a URL should be handled by download and screenshot flows."""
|
||||||
|
|
||||||
|
skip_download: bool = False
|
||||||
|
skip_metadata: bool = False
|
||||||
|
force_screenshot: bool = False
|
||||||
|
extra_tags: list[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
def apply_tags(self, sources: Iterable[str]) -> list[str]:
|
||||||
|
tags = [tag.strip() for tag in self.extra_tags if tag and tag.strip()]
|
||||||
|
for value in sources:
|
||||||
|
text = str(value).strip()
|
||||||
|
if text:
|
||||||
|
tags.append(text)
|
||||||
|
return tags
|
||||||
|
|
||||||
|
|
||||||
|
def _normalise_rule(rule: dict[str, Any]) -> dict[str, Any] | None:
|
||||||
|
pattern = str(rule.get("pattern") or rule.get("host") or "").strip()
|
||||||
|
if not pattern:
|
||||||
|
return None
|
||||||
|
skip_download = bool(rule.get("skip_download"))
|
||||||
|
skip_metadata = bool(rule.get("skip_metadata"))
|
||||||
|
force_screenshot = bool(rule.get("force_screenshot"))
|
||||||
|
extra_tags_raw = rule.get("extra_tags")
|
||||||
|
if isinstance(extra_tags_raw, str):
|
||||||
|
extra_tags = [part.strip() for part in extra_tags_raw.split(",") if part.strip()]
|
||||||
|
elif isinstance(extra_tags_raw, (list, tuple, set)):
|
||||||
|
extra_tags = [str(item).strip() for item in extra_tags_raw if str(item).strip()]
|
||||||
|
else:
|
||||||
|
extra_tags = []
|
||||||
|
return {
|
||||||
|
"pattern": pattern,
|
||||||
|
"skip_download": skip_download,
|
||||||
|
"skip_metadata": skip_metadata,
|
||||||
|
"force_screenshot": force_screenshot,
|
||||||
|
"extra_tags": extra_tags,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_url_policy(config: dict[str, Any], url: str) -> UrlPolicy:
|
||||||
|
policies_raw = config.get("url_policies")
|
||||||
|
if not policies_raw:
|
||||||
|
return UrlPolicy()
|
||||||
|
if not isinstance(policies_raw, list):
|
||||||
|
return UrlPolicy()
|
||||||
|
parsed = urlparse(url)
|
||||||
|
subject = f"{parsed.netloc}{parsed.path}"
|
||||||
|
host = parsed.netloc
|
||||||
|
resolved = UrlPolicy()
|
||||||
|
for rule_raw in policies_raw:
|
||||||
|
if not isinstance(rule_raw, dict):
|
||||||
|
continue
|
||||||
|
rule = _normalise_rule(rule_raw)
|
||||||
|
if rule is None:
|
||||||
|
continue
|
||||||
|
pattern = rule["pattern"]
|
||||||
|
if not (fnmatch(host, pattern) or fnmatch(subject, pattern)):
|
||||||
|
continue
|
||||||
|
if rule["skip_download"]:
|
||||||
|
resolved.skip_download = True
|
||||||
|
if rule["skip_metadata"]:
|
||||||
|
resolved.skip_metadata = True
|
||||||
|
if rule["force_screenshot"]:
|
||||||
|
resolved.force_screenshot = True
|
||||||
|
if rule["extra_tags"]:
|
||||||
|
for tag in rule["extra_tags"]:
|
||||||
|
if tag not in resolved.extra_tags:
|
||||||
|
resolved.extra_tags.append(tag)
|
||||||
|
return resolved
|
||||||
79
helper/utils_constant.py
Normal file
79
helper/utils_constant.py
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
mime_maps = {
|
||||||
|
"image": {
|
||||||
|
"jpg": { "ext": ".jpg", "mimes": ["image/jpeg", "image/jpg"] },
|
||||||
|
"png": { "ext": ".png", "mimes": ["image/png"] },
|
||||||
|
"gif": { "ext": ".gif", "mimes": ["image/gif"] },
|
||||||
|
"webp": { "ext": ".webp", "mimes": ["image/webp"] },
|
||||||
|
"avif": { "ext": ".avif", "mimes": ["image/avif"] },
|
||||||
|
"jxl": { "ext": ".jxl", "mimes": ["image/jxl"] },
|
||||||
|
"bmp": { "ext": ".bmp", "mimes": ["image/bmp"] },
|
||||||
|
"heic": { "ext": ".heic", "mimes": ["image/heic"] },
|
||||||
|
"heif": { "ext": ".heif", "mimes": ["image/heif"] },
|
||||||
|
"ico": { "ext": ".ico", "mimes": ["image/x-icon", "image/vnd.microsoft.icon"] },
|
||||||
|
"qoi": { "ext": ".qoi", "mimes": ["image/qoi"] },
|
||||||
|
"tiff": { "ext": ".tiff", "mimes": ["image/tiff", "image/x-tiff"] },
|
||||||
|
"svg": { "ext": ".svg", "mimes": ["image/svg+xml"] }
|
||||||
|
},
|
||||||
|
"image_sequence": {
|
||||||
|
"apng": { "ext": ".apng", "mimes": ["image/apng"], "sequence": True },
|
||||||
|
"avifs": { "ext": ".avifs", "mimes": ["image/avif-sequence"], "sequence": True },
|
||||||
|
"heics": { "ext": ".heics", "mimes": ["image/heic-sequence"], "sequence": True },
|
||||||
|
"heifs": { "ext": ".heifs", "mimes": ["image/heif-sequence"], "sequence": True }
|
||||||
|
},
|
||||||
|
"video": {
|
||||||
|
"mp4": { "ext": ".mp4", "mimes": ["video/mp4", "audio/mp4"] },
|
||||||
|
"webm": { "ext": ".webm", "mimes": ["video/webm", "audio/webm"] },
|
||||||
|
"mov": { "ext": ".mov", "mimes": ["video/quicktime"] },
|
||||||
|
"ogv": { "ext": ".ogv", "mimes": ["video/ogg"] },
|
||||||
|
"mpeg": { "ext": ".mpeg", "mimes": ["video/mpeg"] },
|
||||||
|
"avi": { "ext": ".avi", "mimes": ["video/x-msvideo", "video/avi"] },
|
||||||
|
"flv": { "ext": ".flv", "mimes": ["video/x-flv"] },
|
||||||
|
"mkv": { "ext": ".mkv", "mimes": ["video/x-matroska", "application/x-matroska"], "audio_only_ext": ".mka" },
|
||||||
|
"wmv": { "ext": ".wmv", "mimes": ["video/x-ms-wmv"] },
|
||||||
|
"rv": { "ext": ".rv", "mimes": ["video/vnd.rn-realvideo"] }
|
||||||
|
},
|
||||||
|
"audio": {
|
||||||
|
"mp3": { "ext": ".mp3", "mimes": ["audio/mpeg", "audio/mp3"] },
|
||||||
|
"m4a": { "ext": ".m4a", "mimes": ["audio/mp4", "audio/x-m4a"] },
|
||||||
|
"ogg": { "ext": ".ogg", "mimes": ["audio/ogg"] },
|
||||||
|
"flac": { "ext": ".flac", "mimes": ["audio/flac"] },
|
||||||
|
"wav": { "ext": ".wav", "mimes": ["audio/wav", "audio/x-wav", "audio/vnd.wave"] },
|
||||||
|
"wma": { "ext": ".wma", "mimes": ["audio/x-ms-wma"] },
|
||||||
|
"tta": { "ext": ".tta", "mimes": ["audio/x-tta"] },
|
||||||
|
"wv": { "ext": ".wv", "mimes": ["audio/x-wavpack", "audio/wavpack"] },
|
||||||
|
"mka": { "ext": ".mka", "mimes": ["audio/x-matroska", "video/x-matroska"] }
|
||||||
|
},
|
||||||
|
"document": {
|
||||||
|
"pdf": { "ext": ".pdf", "mimes": ["application/pdf"] },
|
||||||
|
"epub": { "ext": ".epub", "mimes": ["application/epub+zip"] },
|
||||||
|
"djvu": { "ext": ".djvu", "mimes": ["application/vnd.djvu"] },
|
||||||
|
"rtf": { "ext": ".rtf", "mimes": ["application/rtf"] },
|
||||||
|
"docx": { "ext": ".docx", "mimes": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"] },
|
||||||
|
"xlsx": { "ext": ".xlsx", "mimes": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"] },
|
||||||
|
"pptx": { "ext": ".pptx", "mimes": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"] },
|
||||||
|
"doc": { "ext": ".doc", "mimes": ["application/msword"] },
|
||||||
|
"xls": { "ext": ".xls", "mimes": ["application/vnd.ms-excel"] },
|
||||||
|
"ppt": { "ext": ".ppt", "mimes": ["application/vnd.ms-powerpoint"] }
|
||||||
|
},
|
||||||
|
"archive": {
|
||||||
|
"zip": { "ext": ".zip", "mimes": ["application/zip"] },
|
||||||
|
"7z": { "ext": ".7z", "mimes": ["application/x-7z-compressed"] },
|
||||||
|
"rar": { "ext": ".rar", "mimes": ["application/x-rar-compressed", "application/vnd.rar"] },
|
||||||
|
"gz": { "ext": ".gz", "mimes": ["application/gzip", "application/x-gzip"] },
|
||||||
|
"tar": { "ext": ".tar", "mimes": ["application/x-tar"] },
|
||||||
|
"cbz": { "ext": ".cbz", "mimes": ["application/zip"], "note": "zip archive of images; prefer extension-based detection for comics" }
|
||||||
|
},
|
||||||
|
"project": {
|
||||||
|
"clip": { "ext": ".clip", "mimes": ["application/clip"] },
|
||||||
|
"kra": { "ext": ".kra", "mimes": ["application/x-krita"] },
|
||||||
|
"procreate": { "ext": ".procreate", "mimes": ["application/x-procreate"] },
|
||||||
|
"psd": { "ext": ".psd", "mimes": ["image/vnd.adobe.photoshop"] },
|
||||||
|
"swf": { "ext": ".swf", "mimes": ["application/x-shockwave-flash"] }
|
||||||
|
},
|
||||||
|
"other": {
|
||||||
|
"octet-stream": { "ext": "", "mimes": ["application/octet-stream"] },
|
||||||
|
"json": { "ext": ".json", "mimes": ["application/json"] },
|
||||||
|
"xml": { "ext": ".xml", "mimes": ["application/xml", "text/xml"] },
|
||||||
|
"csv": { "ext": ".csv", "mimes": ["text/csv"] }
|
||||||
|
}
|
||||||
|
}
|
||||||
655
helper/worker_manager.py
Normal file
655
helper/worker_manager.py
Normal file
@@ -0,0 +1,655 @@
|
|||||||
|
"""Worker task management with persistent database storage.
|
||||||
|
|
||||||
|
Manages worker tasks for downloads, searches, imports, etc. with automatic
|
||||||
|
persistence to database and optional auto-refresh callbacks.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional, Dict, Any, List, Callable
|
||||||
|
from datetime import datetime
|
||||||
|
from threading import Thread, Lock
|
||||||
|
import time
|
||||||
|
|
||||||
|
from .local_library import LocalLibraryDB
|
||||||
|
from helper.logger import log
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class Worker:
|
||||||
|
"""Represents a single worker task with state management."""
|
||||||
|
|
||||||
|
def __init__(self, worker_id: str, worker_type: str, title: str = "",
|
||||||
|
description: str = "", manager: Optional['WorkerManager'] = None):
|
||||||
|
"""Initialize a worker.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
worker_id: Unique identifier for this worker
|
||||||
|
worker_type: Type of work (e.g., 'download', 'search', 'import')
|
||||||
|
title: Human-readable title
|
||||||
|
description: Detailed description
|
||||||
|
manager: Reference to parent WorkerManager for state updates
|
||||||
|
"""
|
||||||
|
self.id = worker_id
|
||||||
|
self.type = worker_type
|
||||||
|
self.title = title or worker_type
|
||||||
|
self.description = description
|
||||||
|
self.manager = manager
|
||||||
|
self.status = "running"
|
||||||
|
self.progress = ""
|
||||||
|
self.details = ""
|
||||||
|
self.error_message = ""
|
||||||
|
self.result = "pending"
|
||||||
|
self._stdout_buffer = []
|
||||||
|
self._steps_buffer = []
|
||||||
|
|
||||||
|
def log_step(self, step_text: str) -> None:
|
||||||
|
"""Log a step for this worker.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
step_text: Text describing the step
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if self.manager:
|
||||||
|
self.manager.log_step(self.id, step_text)
|
||||||
|
else:
|
||||||
|
logger.info(f"[{self.id}] {step_text}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error logging step for worker {self.id}: {e}")
|
||||||
|
|
||||||
|
def append_stdout(self, text: str) -> None:
|
||||||
|
"""Append text to stdout log.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: Text to append
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if self.manager:
|
||||||
|
self.manager.append_worker_stdout(self.id, text)
|
||||||
|
else:
|
||||||
|
self._stdout_buffer.append(text)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error appending stdout for worker {self.id}: {e}")
|
||||||
|
|
||||||
|
def get_stdout(self) -> str:
|
||||||
|
"""Get all stdout for this worker.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Complete stdout text
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if self.manager:
|
||||||
|
return self.manager.get_stdout(self.id)
|
||||||
|
else:
|
||||||
|
return "\n".join(self._stdout_buffer)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting stdout for worker {self.id}: {e}")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def get_steps(self) -> str:
|
||||||
|
"""Get all steps for this worker.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Complete steps text
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if self.manager:
|
||||||
|
return self.manager.get_steps(self.id)
|
||||||
|
else:
|
||||||
|
return "\n".join(self._steps_buffer)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting steps for worker {self.id}: {e}")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def update_progress(self, progress: str = "", details: str = "") -> None:
|
||||||
|
"""Update worker progress.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
progress: Progress string (e.g., "50%")
|
||||||
|
details: Additional details
|
||||||
|
"""
|
||||||
|
self.progress = progress
|
||||||
|
self.details = details
|
||||||
|
try:
|
||||||
|
if self.manager:
|
||||||
|
self.manager.update_worker(self.id, progress, details)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error updating worker {self.id}: {e}")
|
||||||
|
|
||||||
|
def finish(self, result: str = "completed", message: str = "") -> None:
|
||||||
|
"""Mark worker as finished.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
result: Result status ('completed', 'error', 'cancelled')
|
||||||
|
message: Result message/error details
|
||||||
|
"""
|
||||||
|
self.result = result
|
||||||
|
self.status = "finished"
|
||||||
|
self.error_message = message
|
||||||
|
try:
|
||||||
|
if self.manager:
|
||||||
|
# Flush and disable logging handler before marking finished
|
||||||
|
self.manager.disable_logging_for_worker(self.id)
|
||||||
|
# Then mark as finished in database
|
||||||
|
self.manager.finish_worker(self.id, result, message)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error finishing worker {self.id}: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
class WorkerLoggingHandler(logging.StreamHandler):
|
||||||
|
"""Custom logging handler that captures logs for a worker."""
|
||||||
|
|
||||||
|
def __init__(self, worker_id: str, db: LocalLibraryDB,
|
||||||
|
manager: Optional['WorkerManager'] = None,
|
||||||
|
buffer_size: int = 50):
|
||||||
|
"""Initialize the handler.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
worker_id: ID of the worker to capture logs for
|
||||||
|
db: Reference to LocalLibraryDB for storing logs
|
||||||
|
buffer_size: Number of logs to buffer before flushing to DB
|
||||||
|
"""
|
||||||
|
super().__init__()
|
||||||
|
self.worker_id = worker_id
|
||||||
|
self.db = db
|
||||||
|
self.manager = manager
|
||||||
|
self.buffer_size = buffer_size
|
||||||
|
self.buffer = []
|
||||||
|
self._lock = Lock()
|
||||||
|
|
||||||
|
# Set a format that includes timestamp and level
|
||||||
|
formatter = logging.Formatter(
|
||||||
|
'%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
|
datefmt='%Y-%m-%d %H:%M:%S'
|
||||||
|
)
|
||||||
|
self.setFormatter(formatter)
|
||||||
|
|
||||||
|
def emit(self, record):
|
||||||
|
"""Emit a log record."""
|
||||||
|
try:
|
||||||
|
# Try to format the record normally
|
||||||
|
try:
|
||||||
|
msg = self.format(record)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
# If formatting fails (e.g., %d format with non-int arg),
|
||||||
|
# build message manually without calling getMessage()
|
||||||
|
try:
|
||||||
|
# Try to format with args if possible
|
||||||
|
if record.args:
|
||||||
|
msg = record.msg % record.args
|
||||||
|
else:
|
||||||
|
msg = record.msg
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
# If that fails too, just use the raw message string
|
||||||
|
msg = str(record.msg)
|
||||||
|
|
||||||
|
# Add timestamp and level if not already in message
|
||||||
|
import time
|
||||||
|
timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(record.created))
|
||||||
|
msg = f"{timestamp} - {record.name} - {record.levelname} - {msg}"
|
||||||
|
|
||||||
|
with self._lock:
|
||||||
|
self.buffer.append(msg)
|
||||||
|
|
||||||
|
# Flush to DB when buffer reaches size
|
||||||
|
if len(self.buffer) >= self.buffer_size:
|
||||||
|
self._flush()
|
||||||
|
except Exception:
|
||||||
|
self.handleError(record)
|
||||||
|
|
||||||
|
def _flush(self):
|
||||||
|
"""Flush buffered logs to database."""
|
||||||
|
if self.buffer:
|
||||||
|
log_text = '\n'.join(self.buffer)
|
||||||
|
try:
|
||||||
|
if self.manager:
|
||||||
|
self.manager.append_worker_stdout(self.worker_id, log_text, channel='log')
|
||||||
|
else:
|
||||||
|
self.db.append_worker_stdout(self.worker_id, log_text, channel='log')
|
||||||
|
except Exception as e:
|
||||||
|
# If we can't write to DB, at least log it
|
||||||
|
log(f"Error flushing worker logs: {e}")
|
||||||
|
self.buffer = []
|
||||||
|
|
||||||
|
def flush(self):
|
||||||
|
"""Flush any buffered records."""
|
||||||
|
with self._lock:
|
||||||
|
self._flush()
|
||||||
|
super().flush()
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
"""Close the handler."""
|
||||||
|
self.flush()
|
||||||
|
super().close()
|
||||||
|
|
||||||
|
|
||||||
|
class WorkerManager:
|
||||||
|
"""Manages persistent worker tasks with auto-refresh capability."""
|
||||||
|
|
||||||
|
def __init__(self, library_root: Path, auto_refresh_interval: float = 2.0):
|
||||||
|
"""Initialize the worker manager.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
library_root: Root directory for the local library database
|
||||||
|
auto_refresh_interval: Seconds between auto-refresh checks (0 = disabled)
|
||||||
|
"""
|
||||||
|
self.library_root = Path(library_root)
|
||||||
|
self.db = LocalLibraryDB(library_root)
|
||||||
|
self.auto_refresh_interval = auto_refresh_interval
|
||||||
|
self.refresh_callbacks: List[Callable] = []
|
||||||
|
self.refresh_thread: Optional[Thread] = None
|
||||||
|
self._stop_refresh = False
|
||||||
|
self._lock = Lock()
|
||||||
|
self.worker_handlers: Dict[str, WorkerLoggingHandler] = {} # Track active handlers
|
||||||
|
self._worker_last_step: Dict[str, str] = {}
|
||||||
|
|
||||||
|
def add_refresh_callback(self, callback: Callable[[List[Dict[str, Any]]], None]) -> None:
|
||||||
|
"""Register a callback to be called on worker updates.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
callback: Function that receives list of active workers
|
||||||
|
"""
|
||||||
|
with self._lock:
|
||||||
|
self.refresh_callbacks.append(callback)
|
||||||
|
|
||||||
|
def expire_running_workers(
|
||||||
|
self,
|
||||||
|
older_than_seconds: int = 300,
|
||||||
|
worker_id_prefix: Optional[str] = None,
|
||||||
|
reason: Optional[str] = None,
|
||||||
|
status: str = "error",
|
||||||
|
) -> int:
|
||||||
|
"""Mark stale running workers as finished.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
older_than_seconds: Idle threshold before expiring.
|
||||||
|
worker_id_prefix: Optional wildcard filter (e.g., 'cli_%').
|
||||||
|
reason: Error message if none already exists.
|
||||||
|
status: New status to apply.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Count of workers updated.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return self.db.expire_running_workers(
|
||||||
|
older_than_seconds=older_than_seconds,
|
||||||
|
status=status,
|
||||||
|
reason=reason,
|
||||||
|
worker_id_prefix=worker_id_prefix,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error(f"Failed to expire stale workers: {exc}", exc_info=True)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def remove_refresh_callback(self, callback: Callable) -> None:
|
||||||
|
"""Remove a refresh callback.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
callback: The callback function to remove
|
||||||
|
"""
|
||||||
|
with self._lock:
|
||||||
|
if callback in self.refresh_callbacks:
|
||||||
|
self.refresh_callbacks.remove(callback)
|
||||||
|
|
||||||
|
def enable_logging_for_worker(self, worker_id: str) -> Optional[WorkerLoggingHandler]:
|
||||||
|
"""Enable logging capture for a worker.
|
||||||
|
|
||||||
|
Creates a logging handler that captures all logs for this worker.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
worker_id: ID of the worker to capture logs for
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The logging handler that was created, or None if there was an error
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
handler = WorkerLoggingHandler(worker_id, self.db, manager=self)
|
||||||
|
with self._lock:
|
||||||
|
self.worker_handlers[worker_id] = handler
|
||||||
|
|
||||||
|
# Add the handler to the root logger so it captures all logs
|
||||||
|
root_logger = logging.getLogger()
|
||||||
|
root_logger.addHandler(handler)
|
||||||
|
root_logger.setLevel(logging.DEBUG) # Capture all levels
|
||||||
|
|
||||||
|
logger.debug(f"[WorkerManager] Enabled logging for worker: {worker_id}")
|
||||||
|
return handler
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[WorkerManager] Error enabling logging for worker {worker_id}: {e}", exc_info=True)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def disable_logging_for_worker(self, worker_id: str) -> None:
|
||||||
|
"""Disable logging capture for a worker and flush any pending logs.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
worker_id: ID of the worker to stop capturing logs for
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with self._lock:
|
||||||
|
handler = self.worker_handlers.pop(worker_id, None)
|
||||||
|
|
||||||
|
if handler:
|
||||||
|
# Flush and close the handler
|
||||||
|
handler.flush()
|
||||||
|
handler.close()
|
||||||
|
|
||||||
|
# Remove from root logger
|
||||||
|
root_logger = logging.getLogger()
|
||||||
|
root_logger.removeHandler(handler)
|
||||||
|
|
||||||
|
logger.debug(f"[WorkerManager] Disabled logging for worker: {worker_id}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[WorkerManager] Error disabling logging for worker {worker_id}: {e}", exc_info=True)
|
||||||
|
|
||||||
|
def track_worker(self, worker_id: str, worker_type: str, title: str = "",
|
||||||
|
description: str = "", total_steps: int = 0,
|
||||||
|
pipe: Optional[str] = None) -> bool:
|
||||||
|
"""Start tracking a new worker.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
worker_id: Unique identifier for the worker
|
||||||
|
worker_type: Type of worker (e.g., 'download', 'search', 'import')
|
||||||
|
title: Worker title/name
|
||||||
|
description: Worker description
|
||||||
|
total_steps: Total number of steps for progress tracking
|
||||||
|
pipe: Text of the originating pipe/prompt, if any
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if worker was inserted successfully
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
result = self.db.insert_worker(worker_id, worker_type, title, description, total_steps, pipe=pipe)
|
||||||
|
if result > 0:
|
||||||
|
logger.debug(f"[WorkerManager] Tracking worker: {worker_id} ({worker_type})")
|
||||||
|
self._start_refresh_if_needed()
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[WorkerManager] Error tracking worker: {e}", exc_info=True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def update_worker(self, worker_id: str, progress: float = 0.0, current_step: str = "",
|
||||||
|
details: str = "", error: str = "") -> bool:
|
||||||
|
"""Update worker progress and status.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
worker_id: Unique identifier for the worker
|
||||||
|
progress: Progress percentage (0-100)
|
||||||
|
current_step: Current step description
|
||||||
|
details: Additional details
|
||||||
|
error: Error message if any
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if update was successful
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
kwargs = {}
|
||||||
|
if progress > 0:
|
||||||
|
kwargs['progress'] = progress
|
||||||
|
if current_step:
|
||||||
|
kwargs['current_step'] = current_step
|
||||||
|
if details:
|
||||||
|
kwargs['description'] = details
|
||||||
|
if error:
|
||||||
|
kwargs['error_message'] = error
|
||||||
|
|
||||||
|
if kwargs:
|
||||||
|
kwargs['last_updated'] = datetime.now().isoformat()
|
||||||
|
if 'current_step' in kwargs and kwargs['current_step']:
|
||||||
|
self._worker_last_step[worker_id] = str(kwargs['current_step'])
|
||||||
|
return self.db.update_worker(worker_id, **kwargs)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[WorkerManager] Error updating worker {worker_id}: {e}", exc_info=True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def finish_worker(self, worker_id: str, result: str = "completed",
|
||||||
|
error_msg: str = "", result_data: str = "") -> bool:
|
||||||
|
"""Mark a worker as finished.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
worker_id: Unique identifier for the worker
|
||||||
|
result: Result status ('completed', 'error', 'cancelled')
|
||||||
|
error_msg: Error message if any
|
||||||
|
result_data: Result data as JSON string
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if update was successful
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
kwargs = {
|
||||||
|
'status': result,
|
||||||
|
'completed_at': datetime.now().isoformat()
|
||||||
|
}
|
||||||
|
if error_msg:
|
||||||
|
kwargs['error_message'] = error_msg
|
||||||
|
if result_data:
|
||||||
|
kwargs['result_data'] = result_data
|
||||||
|
|
||||||
|
success = self.db.update_worker(worker_id, **kwargs)
|
||||||
|
logger.info(f"[WorkerManager] Worker finished: {worker_id} ({result})")
|
||||||
|
self._worker_last_step.pop(worker_id, None)
|
||||||
|
return success
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[WorkerManager] Error finishing worker {worker_id}: {e}", exc_info=True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_active_workers(self) -> List[Dict[str, Any]]:
|
||||||
|
"""Get all active (running) workers.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of active worker dictionaries
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return self.db.get_active_workers()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[WorkerManager] Error getting active workers: {e}", exc_info=True)
|
||||||
|
return []
|
||||||
|
|
||||||
|
def get_finished_workers(self, limit: int = 100) -> List[Dict[str, Any]]:
|
||||||
|
"""Get all finished workers (completed, errored, or cancelled).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
limit: Maximum number of workers to retrieve
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of finished worker dictionaries
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
all_workers = self.db.get_all_workers(limit=limit)
|
||||||
|
# Filter to only finished workers
|
||||||
|
finished = [w for w in all_workers if w.get('status') in ['completed', 'error', 'cancelled']]
|
||||||
|
return finished
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[WorkerManager] Error getting finished workers: {e}", exc_info=True)
|
||||||
|
return []
|
||||||
|
|
||||||
|
def get_worker(self, worker_id: str) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Get a specific worker's data.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
worker_id: Unique identifier for the worker
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Worker data or None if not found
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return self.db.get_worker(worker_id)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[WorkerManager] Error getting worker {worker_id}: {e}", exc_info=True)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_worker_events(self, worker_id: str, limit: int = 500) -> List[Dict[str, Any]]:
|
||||||
|
"""Fetch recorded worker timeline events."""
|
||||||
|
return self.db.get_worker_events(worker_id, limit)
|
||||||
|
|
||||||
|
def log_step(self, worker_id: str, step_text: str) -> bool:
|
||||||
|
"""Log a step to a worker's step history.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
worker_id: Unique identifier for the worker
|
||||||
|
step_text: Step description to log
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if successful
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
success = self.db.append_worker_steps(worker_id, step_text)
|
||||||
|
if success:
|
||||||
|
self._worker_last_step[worker_id] = step_text
|
||||||
|
return success
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[WorkerManager] Error logging step for worker {worker_id}: {e}", exc_info=True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _get_last_step(self, worker_id: str) -> Optional[str]:
|
||||||
|
"""Return the most recent step description for a worker."""
|
||||||
|
return self._worker_last_step.get(worker_id)
|
||||||
|
|
||||||
|
def get_steps(self, worker_id: str) -> str:
|
||||||
|
"""Get step logs for a worker.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
worker_id: Unique identifier for the worker
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Steps text or empty string if not found
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return self.db.get_worker_steps(worker_id)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[WorkerManager] Error getting steps for worker {worker_id}: {e}", exc_info=True)
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def start_auto_refresh(self) -> None:
|
||||||
|
"""Start the auto-refresh thread for periodic worker updates."""
|
||||||
|
if self.auto_refresh_interval <= 0:
|
||||||
|
logger.debug("[WorkerManager] Auto-refresh disabled (interval <= 0)")
|
||||||
|
return
|
||||||
|
|
||||||
|
if self.refresh_thread and self.refresh_thread.is_alive():
|
||||||
|
logger.debug("[WorkerManager] Auto-refresh already running")
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info(f"[WorkerManager] Starting auto-refresh with {self.auto_refresh_interval}s interval")
|
||||||
|
self._stop_refresh = False
|
||||||
|
self.refresh_thread = Thread(target=self._auto_refresh_loop, daemon=True)
|
||||||
|
self.refresh_thread.start()
|
||||||
|
|
||||||
|
def stop_auto_refresh(self) -> None:
|
||||||
|
"""Stop the auto-refresh thread."""
|
||||||
|
logger.info("[WorkerManager] Stopping auto-refresh")
|
||||||
|
self._stop_refresh = True
|
||||||
|
if self.refresh_thread:
|
||||||
|
self.refresh_thread.join(timeout=5)
|
||||||
|
self.refresh_thread = None
|
||||||
|
|
||||||
|
def _start_refresh_if_needed(self) -> None:
|
||||||
|
"""Start auto-refresh if we have active workers and callbacks."""
|
||||||
|
active = self.get_active_workers()
|
||||||
|
if active and self.refresh_callbacks and not self._stop_refresh:
|
||||||
|
self.start_auto_refresh()
|
||||||
|
|
||||||
|
def _auto_refresh_loop(self) -> None:
|
||||||
|
"""Main auto-refresh loop that periodically queries and notifies."""
|
||||||
|
try:
|
||||||
|
while not self._stop_refresh:
|
||||||
|
time.sleep(self.auto_refresh_interval)
|
||||||
|
|
||||||
|
# Check if there are active workers
|
||||||
|
active = self.get_active_workers()
|
||||||
|
|
||||||
|
if not active:
|
||||||
|
# No more active workers, stop refreshing
|
||||||
|
logger.debug("[WorkerManager] No active workers, stopping auto-refresh")
|
||||||
|
break
|
||||||
|
|
||||||
|
# Call all registered callbacks with the active workers
|
||||||
|
with self._lock:
|
||||||
|
for callback in self.refresh_callbacks:
|
||||||
|
try:
|
||||||
|
callback(active)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[WorkerManager] Error in refresh callback: {e}", exc_info=True)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[WorkerManager] Error in auto-refresh loop: {e}", exc_info=True)
|
||||||
|
finally:
|
||||||
|
logger.debug("[WorkerManager] Auto-refresh loop ended")
|
||||||
|
|
||||||
|
def cleanup_old_workers(self, days: int = 7) -> int:
|
||||||
|
"""Clean up completed/errored workers older than specified days.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
days: Delete workers completed more than this many days ago
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Number of workers deleted
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
count = self.db.cleanup_old_workers(days)
|
||||||
|
if count > 0:
|
||||||
|
logger.info(f"[WorkerManager] Cleaned up {count} old workers")
|
||||||
|
return count
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[WorkerManager] Error cleaning up old workers: {e}", exc_info=True)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def append_stdout(self, worker_id: str, text: str, channel: str = "stdout") -> bool:
|
||||||
|
"""Append text to a worker's stdout log.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
worker_id: Unique identifier for the worker
|
||||||
|
text: Text to append
|
||||||
|
channel: Logical channel (stdout, stderr, log, etc.)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if append was successful
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
step_label = self._get_last_step(worker_id)
|
||||||
|
return self.db.append_worker_stdout(worker_id, text, step=step_label, channel=channel)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[WorkerManager] Error appending stdout: {e}", exc_info=True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_stdout(self, worker_id: str) -> str:
|
||||||
|
"""Get stdout logs for a worker.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
worker_id: Unique identifier for the worker
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Worker's stdout or empty string
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return self.db.get_worker_stdout(worker_id)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[WorkerManager] Error getting stdout: {e}", exc_info=True)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def append_worker_stdout(self, worker_id: str, text: str, channel: str = "stdout") -> bool:
|
||||||
|
"""Compatibility wrapper for append_stdout."""
|
||||||
|
return self.append_stdout(worker_id, text, channel=channel)
|
||||||
|
|
||||||
|
def clear_stdout(self, worker_id: str) -> bool:
|
||||||
|
"""Clear stdout logs for a worker.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
worker_id: Unique identifier for the worker
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if clear was successful
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return self.db.clear_worker_stdout(worker_id)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[WorkerManager] Error clearing stdout: {e}", exc_info=True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
"""Close the worker manager and database connection."""
|
||||||
|
self.stop_auto_refresh()
|
||||||
|
self.db.close()
|
||||||
|
logger.info("[WorkerManager] Closed")
|
||||||
425
hydrus_health_check.py
Normal file
425
hydrus_health_check.py
Normal file
@@ -0,0 +1,425 @@
|
|||||||
|
"""Hydrus API health check and initialization.
|
||||||
|
|
||||||
|
Provides startup health checks for Hydrus API availability and gracefully
|
||||||
|
disables Hydrus features if the API is unavailable.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from helper.logger import log
|
||||||
|
from typing import Tuple, Optional, Dict, Any
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Global state for Hydrus availability
|
||||||
|
_HYDRUS_AVAILABLE: Optional[bool] = None
|
||||||
|
_HYDRUS_UNAVAILABLE_REASON: Optional[str] = None
|
||||||
|
_HYDRUS_CHECK_COMPLETE = False
|
||||||
|
|
||||||
|
# Global state for Debrid availability
|
||||||
|
_DEBRID_AVAILABLE: Optional[bool] = None
|
||||||
|
_DEBRID_UNAVAILABLE_REASON: Optional[str] = None
|
||||||
|
_DEBRID_CHECK_COMPLETE = False
|
||||||
|
|
||||||
|
# Global state for MPV availability
|
||||||
|
_MPV_AVAILABLE: Optional[bool] = None
|
||||||
|
_MPV_UNAVAILABLE_REASON: Optional[str] = None
|
||||||
|
_MPV_CHECK_COMPLETE = False
|
||||||
|
|
||||||
|
|
||||||
|
def check_hydrus_availability(config: Dict[str, Any]) -> Tuple[bool, Optional[str]]:
|
||||||
|
"""Check if Hydrus API is available by pinging it.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config: Application configuration dictionary
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (is_available: bool, reason: Optional[str])
|
||||||
|
- (True, None) if Hydrus is available
|
||||||
|
- (False, reason) if Hydrus is unavailable with reason
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from helper.hydrus import is_available as _is_hydrus_available
|
||||||
|
|
||||||
|
logger.info("[Hydrus Health Check] Pinging Hydrus API...")
|
||||||
|
is_available, reason = _is_hydrus_available(config, use_cache=False)
|
||||||
|
|
||||||
|
if is_available:
|
||||||
|
logger.info("[Hydrus Health Check] ✅ Hydrus API is AVAILABLE")
|
||||||
|
return True, None
|
||||||
|
else:
|
||||||
|
reason_str = f": {reason}" if reason else ""
|
||||||
|
logger.warning(f"[Hydrus Health Check] ❌ Hydrus API is UNAVAILABLE{reason_str}")
|
||||||
|
return False, reason
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = str(e)
|
||||||
|
logger.error(f"[Hydrus Health Check] ❌ Error checking Hydrus availability: {error_msg}")
|
||||||
|
return False, error_msg
|
||||||
|
|
||||||
|
|
||||||
|
def initialize_hydrus_health_check(config: Dict[str, Any]) -> None:
|
||||||
|
"""Initialize Hydrus health check at startup.
|
||||||
|
|
||||||
|
This should be called once at application startup to determine if Hydrus
|
||||||
|
features should be enabled or disabled.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config: Application configuration dictionary
|
||||||
|
"""
|
||||||
|
global _HYDRUS_AVAILABLE, _HYDRUS_UNAVAILABLE_REASON, _HYDRUS_CHECK_COMPLETE
|
||||||
|
|
||||||
|
logger.info("[Startup] Starting Hydrus health check...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
is_available, reason = check_hydrus_availability(config)
|
||||||
|
_HYDRUS_AVAILABLE = is_available
|
||||||
|
_HYDRUS_UNAVAILABLE_REASON = reason
|
||||||
|
_HYDRUS_CHECK_COMPLETE = True
|
||||||
|
|
||||||
|
if is_available:
|
||||||
|
log("✅ Hydrus: ENABLED - All Hydrus features available", file=sys.stderr)
|
||||||
|
else:
|
||||||
|
log(f"⚠️ Hydrus: DISABLED - {reason or 'Connection failed'}", file=sys.stderr)
|
||||||
|
log("- Export functionality disabled", file=sys.stderr)
|
||||||
|
log("- Hydrus library features disabled", file=sys.stderr)
|
||||||
|
log("- Hydrus tag operations disabled", file=sys.stderr)
|
||||||
|
log("→ Local storage and All-Debrid features still available", file=sys.stderr)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[Startup] Failed to initialize Hydrus health check: {e}", exc_info=True)
|
||||||
|
_HYDRUS_AVAILABLE = False
|
||||||
|
_HYDRUS_UNAVAILABLE_REASON = str(e)
|
||||||
|
_HYDRUS_CHECK_COMPLETE = True
|
||||||
|
log(f"⚠️ Hydrus: DISABLED - Error during health check: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
|
def check_debrid_availability(config: Dict[str, Any]) -> Tuple[bool, Optional[str]]:
|
||||||
|
"""Check if Debrid API is available.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config: Application configuration dictionary
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (is_available: bool, reason: Optional[str])
|
||||||
|
- (True, None) if Debrid API is available
|
||||||
|
- (False, reason) if Debrid API is unavailable with reason
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from helper.http_client import HTTPClient
|
||||||
|
|
||||||
|
logger.info("[Debrid Health Check] Pinging Debrid API at https://api.alldebrid.com/v4/ping...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Use the public ping endpoint to check API availability
|
||||||
|
# This endpoint doesn't require authentication
|
||||||
|
with HTTPClient(timeout=10.0, verify_ssl=True) as client:
|
||||||
|
response = client.get('https://api.alldebrid.com/v4/ping')
|
||||||
|
logger.debug(f"[Debrid Health Check] Response status: {response.status_code}")
|
||||||
|
|
||||||
|
# Read response text first (handles gzip decompression)
|
||||||
|
try:
|
||||||
|
response_text = response.text
|
||||||
|
logger.debug(f"[Debrid Health Check] Response text: {response_text}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[Debrid Health Check] ❌ Failed to read response text: {e}")
|
||||||
|
return False, f"Failed to read response: {e}"
|
||||||
|
|
||||||
|
# Parse JSON
|
||||||
|
try:
|
||||||
|
result = response.json()
|
||||||
|
logger.debug(f"[Debrid Health Check] Response JSON: {result}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[Debrid Health Check] ❌ Failed to parse JSON: {e}")
|
||||||
|
logger.error(f"[Debrid Health Check] Response was: {response_text}")
|
||||||
|
return False, f"Failed to parse response: {e}"
|
||||||
|
|
||||||
|
# Validate response format
|
||||||
|
if result.get('status') == 'success' and result.get('data', {}).get('ping') == 'pong':
|
||||||
|
logger.info("[Debrid Health Check] ✅ Debrid API is AVAILABLE")
|
||||||
|
return True, None
|
||||||
|
else:
|
||||||
|
logger.warning(f"[Debrid Health Check] ❌ Debrid API returned unexpected response: {result}")
|
||||||
|
return False, "Invalid API response"
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = str(e)
|
||||||
|
logger.warning(f"[Debrid Health Check] ❌ Debrid API error: {error_msg}")
|
||||||
|
import traceback
|
||||||
|
logger.debug(f"[Debrid Health Check] Traceback: {traceback.format_exc()}")
|
||||||
|
return False, error_msg
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = str(e)
|
||||||
|
logger.error(f"[Debrid Health Check] ❌ Error checking Debrid availability: {error_msg}")
|
||||||
|
return False, error_msg
|
||||||
|
|
||||||
|
|
||||||
|
def initialize_debrid_health_check(config: Dict[str, Any]) -> None:
|
||||||
|
"""Initialize Debrid health check at startup.
|
||||||
|
|
||||||
|
This should be called once at application startup to determine if Debrid
|
||||||
|
features should be enabled or disabled.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config: Application configuration dictionary
|
||||||
|
"""
|
||||||
|
global _DEBRID_AVAILABLE, _DEBRID_UNAVAILABLE_REASON, _DEBRID_CHECK_COMPLETE
|
||||||
|
|
||||||
|
logger.info("[Startup] Starting Debrid health check...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
is_available, reason = check_debrid_availability(config)
|
||||||
|
_DEBRID_AVAILABLE = is_available
|
||||||
|
_DEBRID_UNAVAILABLE_REASON = reason
|
||||||
|
_DEBRID_CHECK_COMPLETE = True
|
||||||
|
|
||||||
|
if is_available:
|
||||||
|
log("✅ Debrid: ENABLED - All Debrid features available", file=sys.stderr)
|
||||||
|
logger.info("[Startup] Debrid health check PASSED")
|
||||||
|
else:
|
||||||
|
log(f"⚠️ Debrid: DISABLED - {reason or 'Connection failed'}", file=sys.stderr)
|
||||||
|
log("- Debrid export disabled", file=sys.stderr)
|
||||||
|
log("- Debrid library features disabled", file=sys.stderr)
|
||||||
|
log("→ Local storage and Hydrus features still available", file=sys.stderr)
|
||||||
|
logger.warning(f"[Startup] Debrid health check FAILED: {reason}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[Startup] Failed to initialize Debrid health check: {e}", exc_info=True)
|
||||||
|
_DEBRID_AVAILABLE = False
|
||||||
|
_DEBRID_UNAVAILABLE_REASON = str(e)
|
||||||
|
_DEBRID_CHECK_COMPLETE = True
|
||||||
|
log(f"⚠️ Debrid: DISABLED - Error during health check: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
|
def check_mpv_availability() -> Tuple[bool, Optional[str]]:
|
||||||
|
"""Check if MPV is available (installed and runnable).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (is_available: bool, reason: Optional[str])
|
||||||
|
"""
|
||||||
|
global _MPV_AVAILABLE, _MPV_UNAVAILABLE_REASON, _MPV_CHECK_COMPLETE
|
||||||
|
|
||||||
|
if _MPV_CHECK_COMPLETE and _MPV_AVAILABLE is not None:
|
||||||
|
return _MPV_AVAILABLE, _MPV_UNAVAILABLE_REASON
|
||||||
|
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
logger.info("[MPV Health Check] Checking for MPV executable...")
|
||||||
|
|
||||||
|
mpv_path = shutil.which("mpv")
|
||||||
|
if not mpv_path:
|
||||||
|
_MPV_AVAILABLE = False
|
||||||
|
_MPV_UNAVAILABLE_REASON = "Executable 'mpv' not found in PATH"
|
||||||
|
_MPV_CHECK_COMPLETE = True
|
||||||
|
logger.warning(f"[MPV Health Check] ❌ MPV is UNAVAILABLE: {_MPV_UNAVAILABLE_REASON}")
|
||||||
|
return False, _MPV_UNAVAILABLE_REASON
|
||||||
|
|
||||||
|
# Try to get version to confirm it works
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
[mpv_path, "--version"],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=2
|
||||||
|
)
|
||||||
|
if result.returncode == 0:
|
||||||
|
version_line = result.stdout.split('\n')[0]
|
||||||
|
_MPV_AVAILABLE = True
|
||||||
|
_MPV_UNAVAILABLE_REASON = None
|
||||||
|
_MPV_CHECK_COMPLETE = True
|
||||||
|
logger.info(f"[MPV Health Check] ✅ MPV is AVAILABLE ({version_line})")
|
||||||
|
return True, None
|
||||||
|
else:
|
||||||
|
_MPV_AVAILABLE = False
|
||||||
|
_MPV_UNAVAILABLE_REASON = f"MPV returned non-zero exit code: {result.returncode}"
|
||||||
|
_MPV_CHECK_COMPLETE = True
|
||||||
|
logger.warning(f"[MPV Health Check] ❌ MPV is UNAVAILABLE: {_MPV_UNAVAILABLE_REASON}")
|
||||||
|
return False, _MPV_UNAVAILABLE_REASON
|
||||||
|
except Exception as e:
|
||||||
|
_MPV_AVAILABLE = False
|
||||||
|
_MPV_UNAVAILABLE_REASON = f"Error running MPV: {e}"
|
||||||
|
_MPV_CHECK_COMPLETE = True
|
||||||
|
logger.warning(f"[MPV Health Check] ❌ MPV is UNAVAILABLE: {_MPV_UNAVAILABLE_REASON}")
|
||||||
|
return False, _MPV_UNAVAILABLE_REASON
|
||||||
|
|
||||||
|
|
||||||
|
def initialize_mpv_health_check() -> None:
|
||||||
|
"""Initialize MPV health check at startup.
|
||||||
|
|
||||||
|
This should be called once at application startup to determine if MPV
|
||||||
|
features should be enabled or disabled.
|
||||||
|
"""
|
||||||
|
global _MPV_AVAILABLE, _MPV_UNAVAILABLE_REASON, _MPV_CHECK_COMPLETE
|
||||||
|
|
||||||
|
logger.info("[Startup] Starting MPV health check...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
is_available, reason = check_mpv_availability()
|
||||||
|
_MPV_AVAILABLE = is_available
|
||||||
|
_MPV_UNAVAILABLE_REASON = reason
|
||||||
|
_MPV_CHECK_COMPLETE = True
|
||||||
|
|
||||||
|
if is_available:
|
||||||
|
log("✅ MPV: ENABLED - All MPV features available", file=sys.stderr)
|
||||||
|
logger.info("[Startup] MPV health check PASSED")
|
||||||
|
else:
|
||||||
|
log(f"⚠️ MPV: DISABLED - {reason or 'Connection failed'}", file=sys.stderr)
|
||||||
|
log("→ Hydrus features still available", file=sys.stderr)
|
||||||
|
logger.warning(f"[Startup] MPV health check FAILED: {reason}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[Startup] Failed to initialize MPV health check: {e}", exc_info=True)
|
||||||
|
_MPV_AVAILABLE = False
|
||||||
|
_MPV_UNAVAILABLE_REASON = str(e)
|
||||||
|
_MPV_CHECK_COMPLETE = True
|
||||||
|
log(f"⚠️ MPV: DISABLED - Error during health check: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
|
def is_hydrus_available() -> bool:
|
||||||
|
"""Check if Hydrus is available (from cached health check).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if Hydrus API is available, False otherwise
|
||||||
|
"""
|
||||||
|
return _HYDRUS_AVAILABLE is True
|
||||||
|
|
||||||
|
|
||||||
|
def get_hydrus_unavailable_reason() -> Optional[str]:
|
||||||
|
"""Get the reason why Hydrus is unavailable.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
String explaining why Hydrus is unavailable, or None if available
|
||||||
|
"""
|
||||||
|
return _HYDRUS_UNAVAILABLE_REASON if not is_hydrus_available() else None
|
||||||
|
|
||||||
|
|
||||||
|
def is_hydrus_check_complete() -> bool:
|
||||||
|
"""Check if the Hydrus health check has been completed.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if health check has run, False if still pending
|
||||||
|
"""
|
||||||
|
return _HYDRUS_CHECK_COMPLETE
|
||||||
|
|
||||||
|
|
||||||
|
def disable_hydrus_features() -> None:
|
||||||
|
"""Manually disable all Hydrus features (for testing/fallback).
|
||||||
|
|
||||||
|
This can be called if Hydrus connectivity is lost after startup.
|
||||||
|
"""
|
||||||
|
global _HYDRUS_AVAILABLE, _HYDRUS_UNAVAILABLE_REASON
|
||||||
|
_HYDRUS_AVAILABLE = False
|
||||||
|
_HYDRUS_UNAVAILABLE_REASON = "Manually disabled or lost connection"
|
||||||
|
logger.warning("[Hydrus] Features manually disabled")
|
||||||
|
|
||||||
|
|
||||||
|
def enable_hydrus_features() -> None:
|
||||||
|
"""Manually enable Hydrus features (for testing/fallback).
|
||||||
|
|
||||||
|
This can be called if Hydrus connectivity is restored after startup.
|
||||||
|
"""
|
||||||
|
global _HYDRUS_AVAILABLE, _HYDRUS_UNAVAILABLE_REASON
|
||||||
|
_HYDRUS_AVAILABLE = True
|
||||||
|
_HYDRUS_UNAVAILABLE_REASON = None
|
||||||
|
logger.info("[Hydrus] Features manually enabled")
|
||||||
|
|
||||||
|
|
||||||
|
def is_debrid_available() -> bool:
|
||||||
|
"""Check if Debrid is available (from cached health check).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if Debrid API is available, False otherwise
|
||||||
|
"""
|
||||||
|
return _DEBRID_AVAILABLE is True
|
||||||
|
|
||||||
|
|
||||||
|
def get_debrid_unavailable_reason() -> Optional[str]:
|
||||||
|
"""Get the reason why Debrid is unavailable.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
String explaining why Debrid is unavailable, or None if available
|
||||||
|
"""
|
||||||
|
return _DEBRID_UNAVAILABLE_REASON if not is_debrid_available() else None
|
||||||
|
|
||||||
|
|
||||||
|
def is_debrid_check_complete() -> bool:
|
||||||
|
"""Check if the Debrid health check has been completed.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if health check has run, False if still pending
|
||||||
|
"""
|
||||||
|
return _DEBRID_CHECK_COMPLETE
|
||||||
|
|
||||||
|
|
||||||
|
def disable_debrid_features() -> None:
|
||||||
|
"""Manually disable all Debrid features (for testing/fallback).
|
||||||
|
|
||||||
|
This can be called if Debrid connectivity is lost after startup.
|
||||||
|
"""
|
||||||
|
global _DEBRID_AVAILABLE, _DEBRID_UNAVAILABLE_REASON
|
||||||
|
_DEBRID_AVAILABLE = False
|
||||||
|
_DEBRID_UNAVAILABLE_REASON = "Manually disabled or lost connection"
|
||||||
|
logger.warning("[Debrid] Features manually disabled")
|
||||||
|
|
||||||
|
|
||||||
|
def enable_debrid_features() -> None:
|
||||||
|
"""Manually enable Debrid features (for testing/fallback).
|
||||||
|
|
||||||
|
This can be called if Debrid connectivity is restored after startup.
|
||||||
|
"""
|
||||||
|
global _DEBRID_AVAILABLE, _DEBRID_UNAVAILABLE_REASON
|
||||||
|
_DEBRID_AVAILABLE = True
|
||||||
|
_DEBRID_UNAVAILABLE_REASON = None
|
||||||
|
logger.info("[Debrid] Features manually enabled")
|
||||||
|
|
||||||
|
|
||||||
|
def is_mpv_available() -> bool:
|
||||||
|
"""Check if MPV is available (from cached health check).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if MPV is available, False otherwise
|
||||||
|
"""
|
||||||
|
return _MPV_AVAILABLE is True
|
||||||
|
|
||||||
|
|
||||||
|
def get_mpv_unavailable_reason() -> Optional[str]:
|
||||||
|
"""Get the reason why MPV is unavailable.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
String explaining why MPV is unavailable, or None if available
|
||||||
|
"""
|
||||||
|
return _MPV_UNAVAILABLE_REASON if not is_mpv_available() else None
|
||||||
|
|
||||||
|
|
||||||
|
def is_mpv_check_complete() -> bool:
|
||||||
|
"""Check if the MPV health check has been completed.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if health check has run, False if still pending
|
||||||
|
"""
|
||||||
|
return _MPV_CHECK_COMPLETE
|
||||||
|
|
||||||
|
|
||||||
|
def disable_mpv_features() -> None:
|
||||||
|
"""Manually disable all MPV features (for testing/fallback).
|
||||||
|
|
||||||
|
This can be called if MPV connectivity is lost after startup.
|
||||||
|
"""
|
||||||
|
global _MPV_AVAILABLE, _MPV_UNAVAILABLE_REASON
|
||||||
|
_MPV_AVAILABLE = False
|
||||||
|
_MPV_UNAVAILABLE_REASON = "Manually disabled or lost connection"
|
||||||
|
logger.warning("[MPV] Features manually disabled")
|
||||||
|
|
||||||
|
|
||||||
|
def enable_mpv_features() -> None:
|
||||||
|
"""Manually enable MPV features (for testing/fallback).
|
||||||
|
|
||||||
|
This can be called if MPV connectivity is restored after startup.
|
||||||
|
"""
|
||||||
|
global _MPV_AVAILABLE, _MPV_UNAVAILABLE_REASON
|
||||||
|
_MPV_AVAILABLE = True
|
||||||
|
_MPV_UNAVAILABLE_REASON = None
|
||||||
|
logger.info("[MPV] Features manually enabled")
|
||||||
13
medeia_entry.py
Normal file
13
medeia_entry.py
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
"""Entry point wrapper for Medeia-Macina CLI."""
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Add the current directory to sys.path so we can import CLI
|
||||||
|
root_dir = Path(__file__).parent
|
||||||
|
if str(root_dir) not in sys.path:
|
||||||
|
sys.path.insert(0, str(root_dir))
|
||||||
|
|
||||||
|
from CLI import main
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
2
medeia_macina/__init__.py
Normal file
2
medeia_macina/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
"""Medeia-Macina package - Media management system."""
|
||||||
|
__version__ = "0.1.0"
|
||||||
13
medeia_macina/cli_entry.py
Normal file
13
medeia_macina/cli_entry.py
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
"""Entry point wrapper for Medeia-Macina CLI."""
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Add the parent directory to sys.path so we can import CLI
|
||||||
|
root_dir = Path(__file__).parent.parent
|
||||||
|
if str(root_dir) not in sys.path:
|
||||||
|
sys.path.insert(0, str(root_dir))
|
||||||
|
|
||||||
|
from CLI import main
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
3199
metadata.py
Normal file
3199
metadata.py
Normal file
File diff suppressed because it is too large
Load Diff
678
models.py
Normal file
678
models.py
Normal file
@@ -0,0 +1,678 @@
|
|||||||
|
"""Data models for the pipeline."""
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import math
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import sys
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Callable, Dict, List, Optional, Protocol, TextIO, Tuple
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class PipeObject:
|
||||||
|
"""Unified pipeline object for tracking files, metadata, tags, and relationships through the pipeline.
|
||||||
|
|
||||||
|
This is the single source of truth for all result data in the pipeline. It can represent:
|
||||||
|
- Tag extraction results (IMDb, MusicBrainz, OpenLibrary lookups)
|
||||||
|
- Remote metadata fetches
|
||||||
|
- File operations with metadata/tags and relationship tracking
|
||||||
|
- Search results
|
||||||
|
- Files with version relationships (king/alt/related)
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
source: Source of the object (e.g., 'imdb', 'musicbrainz', 'libgen', 'debrid', 'file', etc.)
|
||||||
|
identifier: Unique identifier from the source (e.g., IMDb ID, MBID, magnet hash, file hash)
|
||||||
|
tags: List of extracted or assigned tags
|
||||||
|
title: Human-readable title if applicable
|
||||||
|
source_url: URL where the object came from
|
||||||
|
duration: Duration in seconds if applicable
|
||||||
|
metadata: Full metadata dictionary from source
|
||||||
|
remote_metadata: Additional remote metadata
|
||||||
|
warnings: Any warnings or issues encountered
|
||||||
|
mpv_metadata: MPV-specific metadata if applicable
|
||||||
|
file_path: Path to the file if this object represents a file
|
||||||
|
file_hash: SHA-256 hash of the file for integrity and relationship tracking
|
||||||
|
king_hash: Hash of the primary/master version of this file (for alternates)
|
||||||
|
alt_hashes: List of hashes for alternate versions of this file
|
||||||
|
related_hashes: List of hashes for related files (e.g., screenshots, editions)
|
||||||
|
is_temp: If True, this is a temporary/intermediate artifact that may be cleaned up
|
||||||
|
action: The cmdlet that created this object (format: 'cmdlet:cmdlet_name', e.g., 'cmdlet:get-file')
|
||||||
|
parent_id: Hash of the parent file in the pipeline chain (for tracking provenance/lineage)
|
||||||
|
extra: Additional fields not covered above
|
||||||
|
"""
|
||||||
|
source: str
|
||||||
|
identifier: str
|
||||||
|
tags: List[str] = field(default_factory=list)
|
||||||
|
title: Optional[str] = None
|
||||||
|
source_url: Optional[str] = None
|
||||||
|
duration: Optional[float] = None
|
||||||
|
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||||
|
remote_metadata: Optional[Dict[str, Any]] = None
|
||||||
|
warnings: List[str] = field(default_factory=list)
|
||||||
|
mpv_metadata: Optional[Dict[str, Any]] = None
|
||||||
|
file_path: Optional[str] = None
|
||||||
|
file_hash: Optional[str] = None
|
||||||
|
king_hash: Optional[str] = None
|
||||||
|
alt_hashes: List[str] = field(default_factory=list)
|
||||||
|
related_hashes: List[str] = field(default_factory=list)
|
||||||
|
is_temp: bool = False
|
||||||
|
action: Optional[str] = None
|
||||||
|
parent_id: Optional[str] = None
|
||||||
|
extra: Dict[str, Any] = field(default_factory=dict)
|
||||||
|
|
||||||
|
def register_as_king(self, file_hash: str) -> None:
|
||||||
|
"""Register this object as the king (primary) version of a file."""
|
||||||
|
self.king_hash = file_hash
|
||||||
|
|
||||||
|
def add_alternate(self, alt_hash: str) -> None:
|
||||||
|
"""Add an alternate version hash for this file."""
|
||||||
|
if alt_hash not in self.alt_hashes:
|
||||||
|
self.alt_hashes.append(alt_hash)
|
||||||
|
|
||||||
|
def add_related(self, related_hash: str) -> None:
|
||||||
|
"""Add a related file hash (e.g., screenshot, edition)."""
|
||||||
|
if related_hash not in self.related_hashes:
|
||||||
|
self.related_hashes.append(related_hash)
|
||||||
|
|
||||||
|
def get_relationships(self) -> Dict[str, Any]:
|
||||||
|
"""Get all relationships for this object."""
|
||||||
|
rels = {}
|
||||||
|
if self.king_hash:
|
||||||
|
rels["king"] = self.king_hash
|
||||||
|
if self.alt_hashes:
|
||||||
|
rels["alt"] = self.alt_hashes
|
||||||
|
if self.related_hashes:
|
||||||
|
rels["related"] = self.related_hashes
|
||||||
|
return rels
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
"""Serialize to dictionary, excluding None and empty values."""
|
||||||
|
data: Dict[str, Any] = {
|
||||||
|
"source": self.source,
|
||||||
|
"tags": self.tags,
|
||||||
|
}
|
||||||
|
if self.identifier:
|
||||||
|
data["id"] = self.identifier
|
||||||
|
if self.title:
|
||||||
|
data["title"] = self.title
|
||||||
|
if self.source_url:
|
||||||
|
data["source_url"] = self.source_url
|
||||||
|
if self.duration is not None:
|
||||||
|
data["duration"] = self.duration
|
||||||
|
if self.metadata:
|
||||||
|
data["metadata"] = self.metadata
|
||||||
|
if self.remote_metadata is not None:
|
||||||
|
data["remote_metadata"] = self.remote_metadata
|
||||||
|
if self.mpv_metadata is not None:
|
||||||
|
data["mpv_metadata"] = self.mpv_metadata
|
||||||
|
if self.warnings:
|
||||||
|
data["warnings"] = self.warnings
|
||||||
|
if self.file_path:
|
||||||
|
data["file_path"] = self.file_path
|
||||||
|
if self.file_hash:
|
||||||
|
data["file_hash"] = self.file_hash
|
||||||
|
# Include pipeline chain tracking fields
|
||||||
|
if self.is_temp:
|
||||||
|
data["is_temp"] = self.is_temp
|
||||||
|
if self.action:
|
||||||
|
data["action"] = self.action
|
||||||
|
if self.parent_id:
|
||||||
|
data["parent_id"] = self.parent_id
|
||||||
|
# Include relationship data if present
|
||||||
|
rels = self.get_relationships()
|
||||||
|
if rels:
|
||||||
|
data["relationships"] = rels
|
||||||
|
data.update({k: v for k, v in self.extra.items() if v is not None})
|
||||||
|
return data
|
||||||
|
|
||||||
|
@property
|
||||||
|
def hash(self) -> str:
|
||||||
|
"""Compute SHA-256 hash from source and identifier."""
|
||||||
|
base = f"{self.source}:{self.identifier}"
|
||||||
|
return hashlib.sha256(base.encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
|
# Backwards compatibility aliases
|
||||||
|
def as_dict(self) -> Dict[str, Any]:
|
||||||
|
"""Alias for to_dict() for backwards compatibility."""
|
||||||
|
return self.to_dict()
|
||||||
|
|
||||||
|
def to_serializable(self) -> Dict[str, Any]:
|
||||||
|
"""Alias for to_dict() for backwards compatibility."""
|
||||||
|
return self.to_dict()
|
||||||
|
|
||||||
|
|
||||||
|
class FileRelationshipTracker:
|
||||||
|
"""Track relationships between files for sidecar creation.
|
||||||
|
|
||||||
|
Allows tagging files with their relationships to other files:
|
||||||
|
- king: The primary/master version of a file
|
||||||
|
- alt: Alternate versions of the same content
|
||||||
|
- related: Related files (e.g., screenshots of a book)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.relationships: Dict[str, Dict[str, Any]] = {}
|
||||||
|
|
||||||
|
def register_king(self, file_path: str, file_hash: str) -> None:
|
||||||
|
"""Register a file as the king (primary) version."""
|
||||||
|
if file_path not in self.relationships:
|
||||||
|
self.relationships[file_path] = {}
|
||||||
|
self.relationships[file_path]["king"] = file_hash
|
||||||
|
|
||||||
|
def add_alt(self, file_path: str, alt_hash: str) -> None:
|
||||||
|
"""Add an alternate version of a file."""
|
||||||
|
if file_path not in self.relationships:
|
||||||
|
self.relationships[file_path] = {}
|
||||||
|
if "alt" not in self.relationships[file_path]:
|
||||||
|
self.relationships[file_path]["alt"] = []
|
||||||
|
if alt_hash not in self.relationships[file_path]["alt"]:
|
||||||
|
self.relationships[file_path]["alt"].append(alt_hash)
|
||||||
|
|
||||||
|
def add_related(self, file_path: str, related_hash: str) -> None:
|
||||||
|
"""Add a related file."""
|
||||||
|
if file_path not in self.relationships:
|
||||||
|
self.relationships[file_path] = {}
|
||||||
|
if "related" not in self.relationships[file_path]:
|
||||||
|
self.relationships[file_path]["related"] = []
|
||||||
|
if related_hash not in self.relationships[file_path]["related"]:
|
||||||
|
self.relationships[file_path]["related"].append(related_hash)
|
||||||
|
|
||||||
|
def get_relationships(self, file_path: str) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Get relationships for a file."""
|
||||||
|
return self.relationships.get(file_path)
|
||||||
|
|
||||||
|
def link_files(self, primary_path: str, king_hash: str, *alt_paths: str) -> None:
|
||||||
|
"""Link files together with primary as king and others as alternates.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
primary_path: Path to the primary file (will be marked as 'king')
|
||||||
|
king_hash: Hash of the primary file
|
||||||
|
alt_paths: Paths to alternate versions (will be marked as 'alt')
|
||||||
|
"""
|
||||||
|
self.register_king(primary_path, king_hash)
|
||||||
|
for alt_path in alt_paths:
|
||||||
|
try:
|
||||||
|
alt_hash = _get_file_hash(alt_path)
|
||||||
|
self.add_alt(primary_path, alt_hash)
|
||||||
|
except Exception as e:
|
||||||
|
import sys
|
||||||
|
print(f"Error hashing {alt_path}: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_file_hash(filepath: str) -> str:
|
||||||
|
"""Calculate SHA256 hash of a file."""
|
||||||
|
sha256_hash = hashlib.sha256()
|
||||||
|
with open(filepath, "rb") as f:
|
||||||
|
for byte_block in iter(lambda: f.read(4096), b""):
|
||||||
|
sha256_hash.update(byte_block)
|
||||||
|
return sha256_hash.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
# ============= Download Module Classes =============
|
||||||
|
|
||||||
|
class DownloadError(RuntimeError):
|
||||||
|
"""Raised when the download or Hydrus import fails."""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class DownloadOptions:
|
||||||
|
"""Configuration for downloading media.
|
||||||
|
|
||||||
|
Use the add-file cmdlet separately for Hydrus import.
|
||||||
|
"""
|
||||||
|
url: str
|
||||||
|
mode: str # "audio" or "video"
|
||||||
|
output_dir: Path
|
||||||
|
cookies_path: Optional[Path] = None
|
||||||
|
ytdl_format: Optional[str] = None
|
||||||
|
extra_tags: Optional[List[str]] = None
|
||||||
|
debug_log: Optional[Path] = None
|
||||||
|
native_progress: bool = False
|
||||||
|
clip_sections: Optional[str] = None
|
||||||
|
playlist_items: Optional[str] = None # yt-dlp --playlist-items format (e.g., "1-3,5,8")
|
||||||
|
no_playlist: bool = False # If True, pass --no-playlist to yt-dlp
|
||||||
|
|
||||||
|
|
||||||
|
class SendFunc(Protocol):
|
||||||
|
"""Protocol for event sender function."""
|
||||||
|
def __call__(self, event: str, **payload: Any) -> None:
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class DownloadMediaResult:
|
||||||
|
"""Result of a successful media download."""
|
||||||
|
path: Path
|
||||||
|
info: Dict[str, Any]
|
||||||
|
tags: List[str]
|
||||||
|
source_url: Optional[str]
|
||||||
|
hash_value: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class DebugLogger:
|
||||||
|
"""Logs events to a JSON debug file for troubleshooting downloads."""
|
||||||
|
path: Path
|
||||||
|
file: Optional[TextIO] = None
|
||||||
|
session_started: bool = False
|
||||||
|
|
||||||
|
def ensure_open(self) -> None:
|
||||||
|
"""Open the debug log file if not already open."""
|
||||||
|
if self.file is not None:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
parent = self.path.parent
|
||||||
|
if parent and not parent.exists():
|
||||||
|
parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
self.file = self.path.open("a", encoding="utf-8")
|
||||||
|
except OSError as exc: # pragma: no cover - surfaces to stderr
|
||||||
|
print(f"Failed to open debug log {self.path}: {exc}", file=sys.stderr)
|
||||||
|
self.file = None
|
||||||
|
return
|
||||||
|
self._write_session_header()
|
||||||
|
|
||||||
|
def _write_session_header(self) -> None:
|
||||||
|
"""Write session start marker to log."""
|
||||||
|
if self.session_started:
|
||||||
|
return
|
||||||
|
self.session_started = True
|
||||||
|
self.write_record("session-start", {"pid": os.getpid(), "exe": sys.executable})
|
||||||
|
|
||||||
|
def write_raw(self, text: str) -> None:
|
||||||
|
"""Write raw text to debug log."""
|
||||||
|
self.ensure_open()
|
||||||
|
if self.file is None:
|
||||||
|
return
|
||||||
|
self.file.write(text + "\n")
|
||||||
|
self.file.flush()
|
||||||
|
|
||||||
|
def write_record(self, event: str, payload: Optional[Dict[str, Any]] = None) -> None:
|
||||||
|
"""Write a structured event record to debug log."""
|
||||||
|
record = {
|
||||||
|
"timestamp": datetime.datetime.utcnow().isoformat(timespec="seconds") + "Z",
|
||||||
|
"event": event,
|
||||||
|
"payload": payload,
|
||||||
|
}
|
||||||
|
self.write_raw(json.dumps(_sanitise_for_json(record), ensure_ascii=False))
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
"""Close the debug log file."""
|
||||||
|
if self.file is None:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
self.file.close()
|
||||||
|
finally:
|
||||||
|
self.file = None
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitise_for_json(value: Any, *, max_depth: int = 8, _seen: Optional[set[int]] = None) -> Any:
|
||||||
|
"""Best-effort conversion to JSON-serialisable types without raising on cycles."""
|
||||||
|
import math
|
||||||
|
from dataclasses import asdict, is_dataclass
|
||||||
|
|
||||||
|
if value is None or isinstance(value, (str, bool)):
|
||||||
|
return value
|
||||||
|
if isinstance(value, (int, float)):
|
||||||
|
if isinstance(value, float) and not math.isfinite(value):
|
||||||
|
return repr(value)
|
||||||
|
return value
|
||||||
|
if isinstance(value, Path):
|
||||||
|
return str(value)
|
||||||
|
if isinstance(value, bytes):
|
||||||
|
try:
|
||||||
|
return value.decode()
|
||||||
|
except Exception:
|
||||||
|
return value.hex()
|
||||||
|
|
||||||
|
if max_depth <= 0:
|
||||||
|
return repr(value)
|
||||||
|
|
||||||
|
if _seen is None:
|
||||||
|
_seen = set()
|
||||||
|
|
||||||
|
obj_id = id(value)
|
||||||
|
if obj_id in _seen:
|
||||||
|
return "<circular>"
|
||||||
|
|
||||||
|
_seen.add(obj_id)
|
||||||
|
try:
|
||||||
|
if isinstance(value, dict):
|
||||||
|
return {
|
||||||
|
str(key): _sanitise_for_json(val, max_depth=max_depth - 1, _seen=_seen)
|
||||||
|
for key, val in value.items()
|
||||||
|
}
|
||||||
|
if isinstance(value, (list, tuple, set)):
|
||||||
|
iterable = value if not isinstance(value, set) else list(value)
|
||||||
|
return [
|
||||||
|
_sanitise_for_json(item, max_depth=max_depth - 1, _seen=_seen)
|
||||||
|
for item in iterable
|
||||||
|
]
|
||||||
|
if is_dataclass(value) and not isinstance(value, type):
|
||||||
|
return _sanitise_for_json(asdict(value), max_depth=max_depth - 1, _seen=_seen)
|
||||||
|
finally:
|
||||||
|
_seen.discard(obj_id)
|
||||||
|
|
||||||
|
return repr(value)
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# PROGRESS BAR CLASS
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
class ProgressBar:
|
||||||
|
"""Formats download progress with visual bar, speed, ETA, and file size."""
|
||||||
|
|
||||||
|
def __init__(self, width: Optional[int] = None):
|
||||||
|
"""Initialize progress bar with optional custom width.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
width: Terminal width, defaults to auto-detect.
|
||||||
|
"""
|
||||||
|
if width is None:
|
||||||
|
width = shutil.get_terminal_size((80, 20))[0]
|
||||||
|
self.width = max(40, width) # Minimum 40 chars for readability
|
||||||
|
|
||||||
|
def format_bytes(self, bytes_val: Optional[float]) -> str:
|
||||||
|
"""Format bytes to human-readable size.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
bytes_val: Number of bytes or None.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Formatted string (e.g., "123.4 MB", "1.2 GB").
|
||||||
|
"""
|
||||||
|
if bytes_val is None or bytes_val <= 0:
|
||||||
|
return "?.? B"
|
||||||
|
|
||||||
|
for unit in ("B", "KB", "MB", "GB", "TB"):
|
||||||
|
if bytes_val < 1024:
|
||||||
|
return f"{bytes_val:.1f} {unit}"
|
||||||
|
bytes_val /= 1024
|
||||||
|
|
||||||
|
return f"{bytes_val:.1f} PB"
|
||||||
|
|
||||||
|
def format_speed(self, speed_str: Optional[str]) -> str:
|
||||||
|
"""Format download speed.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
speed_str: Speed string from yt-dlp (e.g., "1.23MiB/s").
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Formatted speed string or "?.? KB/s".
|
||||||
|
"""
|
||||||
|
if not speed_str or speed_str.strip() == "":
|
||||||
|
return "?.? KB/s"
|
||||||
|
return speed_str.strip()
|
||||||
|
|
||||||
|
def format_eta(self, eta_str: Optional[str]) -> str:
|
||||||
|
"""Format estimated time remaining.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
eta_str: ETA string from yt-dlp (e.g., "00:12:34").
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Formatted ETA string or "?:?:?".
|
||||||
|
"""
|
||||||
|
if not eta_str or eta_str.strip() == "":
|
||||||
|
return "?:?:?"
|
||||||
|
return eta_str.strip()
|
||||||
|
|
||||||
|
def format_percent(self, percent_str: Optional[str]) -> float:
|
||||||
|
"""Extract percent as float.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
percent_str: Percent string from yt-dlp (e.g., "45.2%").
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Float 0-100 or 0 if invalid.
|
||||||
|
"""
|
||||||
|
if not percent_str:
|
||||||
|
return 0.0
|
||||||
|
try:
|
||||||
|
return float(percent_str.replace("%", "").strip())
|
||||||
|
except ValueError:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
def build_bar(self, percent: float, width: int = 30) -> str:
|
||||||
|
"""Build ASCII progress bar.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
percent: Completion percentage (0-100).
|
||||||
|
width: Bar width in characters.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Progress bar string (e.g., "[████████░░░░░░░░░░░░░░░░░░]").
|
||||||
|
"""
|
||||||
|
percent = max(0, min(100, percent)) # Clamp to 0-100
|
||||||
|
filled = int(percent * width / 100)
|
||||||
|
empty = width - filled
|
||||||
|
|
||||||
|
# Use box-drawing characters for nice appearance
|
||||||
|
bar = "█" * filled + "░" * empty
|
||||||
|
return f"[{bar}]"
|
||||||
|
|
||||||
|
def format_progress(
|
||||||
|
self,
|
||||||
|
percent_str: Optional[str] = None,
|
||||||
|
downloaded: Optional[int] = None,
|
||||||
|
total: Optional[int] = None,
|
||||||
|
speed_str: Optional[str] = None,
|
||||||
|
eta_str: Optional[str] = None,
|
||||||
|
) -> str:
|
||||||
|
"""Format complete progress line.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
percent_str: Percent string (e.g., "45.2%").
|
||||||
|
downloaded: Downloaded bytes.
|
||||||
|
total: Total bytes.
|
||||||
|
speed_str: Speed string (e.g., "1.23MiB/s").
|
||||||
|
eta_str: ETA string (e.g., "00:12:34").
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Formatted progress string.
|
||||||
|
"""
|
||||||
|
percent = self.format_percent(percent_str)
|
||||||
|
bar = self.build_bar(percent)
|
||||||
|
|
||||||
|
# Format sizes
|
||||||
|
if downloaded is not None and total is not None and total > 0:
|
||||||
|
size_str = f"{self.format_bytes(downloaded)} / {self.format_bytes(total)}"
|
||||||
|
elif total is not None and total > 0:
|
||||||
|
size_str = f"/ {self.format_bytes(total)}"
|
||||||
|
elif downloaded is not None and downloaded > 0:
|
||||||
|
size_str = f"{self.format_bytes(downloaded)} downloaded"
|
||||||
|
else:
|
||||||
|
size_str = ""
|
||||||
|
|
||||||
|
speed = self.format_speed(speed_str)
|
||||||
|
eta = self.format_eta(eta_str)
|
||||||
|
|
||||||
|
# Build complete line
|
||||||
|
# Format: [████░░░░] 45.2% | 125.5 MB / 278.3 MB | 1.23 MB/s | ETA 00:12:34
|
||||||
|
parts = [
|
||||||
|
bar,
|
||||||
|
f"{percent:5.1f}%",
|
||||||
|
]
|
||||||
|
|
||||||
|
if size_str:
|
||||||
|
parts.append(f"| {size_str}")
|
||||||
|
|
||||||
|
parts.append(f"| {speed}")
|
||||||
|
parts.append(f"| ETA {eta}")
|
||||||
|
|
||||||
|
return " ".join(parts)
|
||||||
|
|
||||||
|
def format_summary(
|
||||||
|
self,
|
||||||
|
total: Optional[int] = None,
|
||||||
|
speed_str: Optional[str] = None,
|
||||||
|
elapsed_str: Optional[str] = None,
|
||||||
|
) -> str:
|
||||||
|
"""Format completion summary.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
total: Total bytes downloaded.
|
||||||
|
speed_str: Average speed.
|
||||||
|
elapsed_str: Total time elapsed.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Summary string.
|
||||||
|
"""
|
||||||
|
parts = ["✓ Download complete"]
|
||||||
|
|
||||||
|
if total is not None and total > 0:
|
||||||
|
parts.append(f"| {self.format_bytes(total)}")
|
||||||
|
|
||||||
|
if speed_str:
|
||||||
|
parts.append(f"| {speed_str.strip()}")
|
||||||
|
|
||||||
|
if elapsed_str:
|
||||||
|
parts.append(f"| {elapsed_str.strip()}")
|
||||||
|
|
||||||
|
return " ".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# PIPELINE EXECUTION CONTEXT
|
||||||
|
# Consolidated from pipeline_context.py
|
||||||
|
# ============================================================================
|
||||||
|
# Note: Pipeline functions and state variables moved to pipeline.py
|
||||||
|
|
||||||
|
class PipelineStageContext:
|
||||||
|
"""Context information for the current pipeline stage."""
|
||||||
|
|
||||||
|
def __init__(self, stage_index: int, total_stages: int):
|
||||||
|
self.stage_index = stage_index
|
||||||
|
self.total_stages = total_stages
|
||||||
|
self.is_last_stage = (stage_index == total_stages - 1)
|
||||||
|
self.emits: List[Any] = []
|
||||||
|
|
||||||
|
def emit(self, obj: Any) -> None:
|
||||||
|
"""Emit an object to the next pipeline stage."""
|
||||||
|
self.emits.append(obj)
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return f"PipelineStageContext(stage={self.stage_index}/{self.total_stages}, is_last={self.is_last_stage})"
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# RESULT TABLE CLASSES
|
||||||
|
# Consolidated from result_table.py
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class InputOption:
|
||||||
|
"""Represents an interactive input option (cmdlet argument) in a table.
|
||||||
|
|
||||||
|
Allows users to select options that translate to cmdlet arguments,
|
||||||
|
enabling interactive configuration right from the result table.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
# Create an option for location selection
|
||||||
|
location_opt = InputOption(
|
||||||
|
"location",
|
||||||
|
type="enum",
|
||||||
|
choices=["local", "hydrus", "0x0"],
|
||||||
|
description="Download destination"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Use in result table
|
||||||
|
table.add_input_option(location_opt)
|
||||||
|
selected = table.select_option("location") # Returns user choice
|
||||||
|
"""
|
||||||
|
name: str
|
||||||
|
"""Option name (maps to cmdlet argument)"""
|
||||||
|
type: str = "string"
|
||||||
|
"""Option type: 'string', 'enum', 'flag', 'integer'"""
|
||||||
|
choices: List[str] = field(default_factory=list)
|
||||||
|
"""Valid choices for enum type"""
|
||||||
|
default: Optional[str] = None
|
||||||
|
"""Default value if not specified"""
|
||||||
|
description: str = ""
|
||||||
|
"""Description of what this option does"""
|
||||||
|
validator: Optional[Callable[[str], bool]] = None
|
||||||
|
"""Optional validator function: takes value, returns True if valid"""
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
"""Convert to dictionary."""
|
||||||
|
return {
|
||||||
|
"name": self.name,
|
||||||
|
"type": self.type,
|
||||||
|
"choices": self.choices if self.choices else None,
|
||||||
|
"default": self.default,
|
||||||
|
"description": self.description,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TUIResultCard:
|
||||||
|
"""Represents a result as a UI card with title, metadata, and actions.
|
||||||
|
|
||||||
|
Used in hub-ui and TUI contexts to render individual search results
|
||||||
|
as grouped components with visual structure.
|
||||||
|
"""
|
||||||
|
title: str
|
||||||
|
subtitle: Optional[str] = None
|
||||||
|
metadata: Optional[Dict[str, str]] = None
|
||||||
|
media_kind: Optional[str] = None
|
||||||
|
tags: Optional[List[str]] = None
|
||||||
|
file_hash: Optional[str] = None
|
||||||
|
file_size: Optional[str] = None
|
||||||
|
duration: Optional[str] = None
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
"""Initialize default values."""
|
||||||
|
if self.metadata is None:
|
||||||
|
self.metadata = {}
|
||||||
|
if self.tags is None:
|
||||||
|
self.tags = []
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ResultColumn:
|
||||||
|
"""Represents a single column in a result table."""
|
||||||
|
name: str
|
||||||
|
value: str
|
||||||
|
width: Optional[int] = None
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
"""String representation of the column."""
|
||||||
|
return f"{self.name}: {self.value}"
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, str]:
|
||||||
|
"""Convert to dictionary."""
|
||||||
|
return {"name": self.name, "value": self.value}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ResultRow:
|
||||||
|
"""Represents a single row in a result table."""
|
||||||
|
columns: List[ResultColumn] = field(default_factory=list)
|
||||||
|
|
||||||
|
def add_column(self, name: str, value: Any) -> None:
|
||||||
|
"""Add a column to this row."""
|
||||||
|
str_value = str(value) if value is not None else ""
|
||||||
|
self.columns.append(ResultColumn(name, str_value))
|
||||||
|
|
||||||
|
def get_column(self, name: str) -> Optional[str]:
|
||||||
|
"""Get column value by name."""
|
||||||
|
for col in self.columns:
|
||||||
|
if col.name.lower() == name.lower():
|
||||||
|
return col.value
|
||||||
|
return None
|
||||||
|
|
||||||
|
def to_dict(self) -> List[Dict[str, str]]:
|
||||||
|
"""Convert to list of column dicts."""
|
||||||
|
return [col.to_dict() for col in self.columns]
|
||||||
|
|
||||||
|
def to_list(self) -> List[tuple[str, str]]:
|
||||||
|
"""Convert to list of (name, value) tuples."""
|
||||||
|
return [(col.name, col.value) for col in self.columns]
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
"""String representation of the row."""
|
||||||
|
return " | ".join(str(col) for col in self.columns)
|
||||||
679
pipeline.py
Normal file
679
pipeline.py
Normal file
@@ -0,0 +1,679 @@
|
|||||||
|
"""Pipeline execution context and state management for cmdlets.
|
||||||
|
|
||||||
|
This module provides functions for managing pipeline state, allowing cmdlets to
|
||||||
|
emit results and control printing behavior within a piped execution context.
|
||||||
|
|
||||||
|
Key Concepts:
|
||||||
|
- Pipeline stages are chained command invocations
|
||||||
|
- Each stage receives input items and emits output items
|
||||||
|
- Printing behavior is controlled based on pipeline position
|
||||||
|
- Stage context tracks whether this is the last stage (affects output verbosity)
|
||||||
|
|
||||||
|
PowerShell-like piping model:
|
||||||
|
- Each stage processes items individually
|
||||||
|
- Stage calls emit() for each output item
|
||||||
|
- Output items become input for next stage
|
||||||
|
- Batch commands receive all items at once (special case)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from typing import Any, Dict, List, Optional, Sequence
|
||||||
|
|
||||||
|
from models import PipelineStageContext
|
||||||
|
from helper.logger import log
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# PIPELINE GLOBALS (maintained for backward compatibility)
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
# Current pipeline context (thread-local in real world, global here for simplicity)
|
||||||
|
_CURRENT_CONTEXT: Optional[PipelineStageContext] = None
|
||||||
|
|
||||||
|
# Active execution state
|
||||||
|
_PIPE_EMITS: List[Any] = []
|
||||||
|
_PIPE_ACTIVE: bool = False
|
||||||
|
_PIPE_IS_LAST: bool = False
|
||||||
|
|
||||||
|
# Ephemeral handoff for direct pipelines (e.g., URL --screen-shot | ...)
|
||||||
|
_LAST_PIPELINE_CAPTURE: Optional[Any] = None
|
||||||
|
|
||||||
|
# Remember last search query to support refreshing results after pipeline actions
|
||||||
|
_LAST_SEARCH_QUERY: Optional[str] = None
|
||||||
|
|
||||||
|
# Track whether the last pipeline execution already refreshed and displayed results
|
||||||
|
_PIPELINE_REFRESHED: bool = False
|
||||||
|
|
||||||
|
# Cache the last pipeline outputs so non-interactive callers can inspect results
|
||||||
|
_PIPELINE_LAST_ITEMS: List[Any] = []
|
||||||
|
|
||||||
|
# Store the last result table for @ selection syntax (e.g., @2, @2-5, @{1,3,5})
|
||||||
|
_LAST_RESULT_TABLE: Optional[Any] = None
|
||||||
|
_LAST_RESULT_ITEMS: List[Any] = []
|
||||||
|
|
||||||
|
# History of result tables for @.. navigation (LIFO stack, max 20 tables)
|
||||||
|
_RESULT_TABLE_HISTORY: List[tuple[Optional[Any], List[Any]]] = []
|
||||||
|
_MAX_RESULT_TABLE_HISTORY = 20
|
||||||
|
|
||||||
|
# Current stage table for @N expansion (separate from history)
|
||||||
|
# Used to track the ResultTable with source_command + row_selection_args from current pipeline stage
|
||||||
|
# This is set by cmdlets that display tabular results (e.g., download-data showing formats)
|
||||||
|
# and used by CLI to expand @N into full commands like "download-data URL -item 2"
|
||||||
|
_CURRENT_STAGE_TABLE: Optional[Any] = None
|
||||||
|
|
||||||
|
# Items displayed by non-selectable commands (get-tag, delete-tag, etc.)
|
||||||
|
# These are available for @N selection but NOT saved to history
|
||||||
|
_DISPLAY_ITEMS: List[Any] = []
|
||||||
|
|
||||||
|
# Table for display-only commands (overlay)
|
||||||
|
# Used when a command wants to show a specific table formatting but not affect history
|
||||||
|
_DISPLAY_TABLE: Optional[Any] = None
|
||||||
|
|
||||||
|
# Track the indices the user selected via @ syntax for the current invocation
|
||||||
|
_PIPELINE_LAST_SELECTION: List[int] = []
|
||||||
|
|
||||||
|
# Track the currently executing command/pipeline string for worker attribution
|
||||||
|
_PIPELINE_COMMAND_TEXT: str = ""
|
||||||
|
|
||||||
|
# Shared scratchpad for cmdlets/funacts to stash structured data between stages
|
||||||
|
_PIPELINE_VALUES: Dict[str, Any] = {}
|
||||||
|
_PIPELINE_MISSING = object()
|
||||||
|
|
||||||
|
# Global callback to notify UI when library content changes
|
||||||
|
_UI_LIBRARY_REFRESH_CALLBACK: Optional[Any] = None
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# PUBLIC API
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
def set_stage_context(context: Optional[PipelineStageContext]) -> None:
|
||||||
|
"""Internal: Set the current pipeline stage context."""
|
||||||
|
global _CURRENT_CONTEXT
|
||||||
|
_CURRENT_CONTEXT = context
|
||||||
|
|
||||||
|
|
||||||
|
def get_stage_context() -> Optional[PipelineStageContext]:
|
||||||
|
"""Get the current pipeline stage context."""
|
||||||
|
return _CURRENT_CONTEXT
|
||||||
|
|
||||||
|
|
||||||
|
def emit(obj: Any) -> None:
|
||||||
|
"""Emit an object to the current pipeline stage output.
|
||||||
|
|
||||||
|
Call this from a cmdlet to pass data to the next pipeline stage.
|
||||||
|
If not in a pipeline context, this is a no-op.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
obj: Any object to emit downstream
|
||||||
|
|
||||||
|
Example:
|
||||||
|
```python
|
||||||
|
def _run(item, args, config):
|
||||||
|
result = process(item)
|
||||||
|
if result:
|
||||||
|
emit(result) # Pass to next stage
|
||||||
|
return 0
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
# Try new context-based approach first
|
||||||
|
if _CURRENT_CONTEXT is not None:
|
||||||
|
import logging
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logger.debug(f"[EMIT] Context-based: appending to _CURRENT_CONTEXT.emits. obj={obj}")
|
||||||
|
_CURRENT_CONTEXT.emit(obj)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Fallback to legacy global approach (for backward compatibility)
|
||||||
|
try:
|
||||||
|
import logging
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logger.debug(f"[EMIT] Legacy: appending to _PIPE_EMITS. obj type={type(obj).__name__}, _PIPE_EMITS len before={len(_PIPE_EMITS)}")
|
||||||
|
_PIPE_EMITS.append(obj)
|
||||||
|
logger.debug(f"[EMIT] Legacy: _PIPE_EMITS len after={len(_PIPE_EMITS)}")
|
||||||
|
except Exception as e:
|
||||||
|
import logging
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logger.error(f"[EMIT] Error appending to _PIPE_EMITS: {e}", exc_info=True)
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def print_if_visible(*args: Any, file=None, **kwargs: Any) -> None:
|
||||||
|
"""Print only if this is not a quiet mid-pipeline stage.
|
||||||
|
|
||||||
|
- Always allow errors printed to stderr by callers (they pass file=sys.stderr).
|
||||||
|
- For normal info messages, this suppresses printing for intermediate pipeline stages.
|
||||||
|
- Use this instead of log() in cmdlets when you want stage-aware output.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
*args: Arguments to print (same as built-in print)
|
||||||
|
file: Output stream (default: stdout)
|
||||||
|
**kwargs: Keyword arguments for print
|
||||||
|
|
||||||
|
Example:
|
||||||
|
```python
|
||||||
|
# Always shows errors
|
||||||
|
print_if_visible("[error] Something failed", file=sys.stderr)
|
||||||
|
|
||||||
|
# Only shows in non-piped context or as final stage
|
||||||
|
print_if_visible(f"Processed {count} items")
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Print if: not in a pipeline OR this is the last stage
|
||||||
|
should_print = (not _PIPE_ACTIVE) or _PIPE_IS_LAST
|
||||||
|
|
||||||
|
# Always print to stderr regardless
|
||||||
|
if file is not None:
|
||||||
|
should_print = True
|
||||||
|
|
||||||
|
if should_print:
|
||||||
|
log(*args, **kwargs) if file is None else log(*args, file=file, **kwargs)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def store_value(key: str, value: Any) -> None:
|
||||||
|
"""Store a value to pass to later pipeline stages.
|
||||||
|
|
||||||
|
Values are stored in a shared dictionary keyed by normalized lowercase strings.
|
||||||
|
This allows one stage to prepare data for the next stage without intermediate output.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key: Variable name (normalized to lowercase, non-empty)
|
||||||
|
value: Any Python object to store
|
||||||
|
"""
|
||||||
|
if not isinstance(key, str):
|
||||||
|
return
|
||||||
|
text = key.strip().lower()
|
||||||
|
if not text:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
_PIPELINE_VALUES[text] = value
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def load_value(key: str, default: Any = None) -> Any:
|
||||||
|
"""Retrieve a value stored by an earlier pipeline stage.
|
||||||
|
|
||||||
|
Supports dotted path notation for nested access (e.g., "metadata.tags" or "items.0").
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key: Variable name or dotted path (e.g., "my_var", "metadata.title", "list.0")
|
||||||
|
default: Value to return if key not found or access fails
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The stored value, or default if not found
|
||||||
|
"""
|
||||||
|
if not isinstance(key, str):
|
||||||
|
return default
|
||||||
|
text = key.strip()
|
||||||
|
if not text:
|
||||||
|
return default
|
||||||
|
parts = [segment.strip() for segment in text.split('.') if segment.strip()]
|
||||||
|
if not parts:
|
||||||
|
return default
|
||||||
|
root_key = parts[0].lower()
|
||||||
|
container = _PIPELINE_VALUES.get(root_key, _PIPELINE_MISSING)
|
||||||
|
if container is _PIPELINE_MISSING:
|
||||||
|
return default
|
||||||
|
if len(parts) == 1:
|
||||||
|
return container
|
||||||
|
current: Any = container
|
||||||
|
for fragment in parts[1:]:
|
||||||
|
if isinstance(current, dict):
|
||||||
|
fragment_lower = fragment.lower()
|
||||||
|
if fragment in current:
|
||||||
|
current = current[fragment]
|
||||||
|
continue
|
||||||
|
match = _PIPELINE_MISSING
|
||||||
|
for key_name, value in current.items():
|
||||||
|
if isinstance(key_name, str) and key_name.lower() == fragment_lower:
|
||||||
|
match = value
|
||||||
|
break
|
||||||
|
if match is _PIPELINE_MISSING:
|
||||||
|
return default
|
||||||
|
current = match
|
||||||
|
continue
|
||||||
|
if isinstance(current, (list, tuple)):
|
||||||
|
if fragment.isdigit():
|
||||||
|
try:
|
||||||
|
idx = int(fragment)
|
||||||
|
except ValueError:
|
||||||
|
return default
|
||||||
|
if 0 <= idx < len(current):
|
||||||
|
current = current[idx]
|
||||||
|
continue
|
||||||
|
return default
|
||||||
|
if hasattr(current, fragment):
|
||||||
|
try:
|
||||||
|
current = getattr(current, fragment)
|
||||||
|
continue
|
||||||
|
except Exception:
|
||||||
|
return default
|
||||||
|
return default
|
||||||
|
return current
|
||||||
|
|
||||||
|
|
||||||
|
def reset() -> None:
|
||||||
|
"""Reset all pipeline state. Called between pipeline executions."""
|
||||||
|
global _PIPE_EMITS, _PIPE_ACTIVE, _PIPE_IS_LAST, _PIPELINE_VALUES
|
||||||
|
global _LAST_PIPELINE_CAPTURE, _PIPELINE_REFRESHED, _PIPELINE_LAST_ITEMS
|
||||||
|
global _PIPELINE_COMMAND_TEXT
|
||||||
|
|
||||||
|
_PIPE_EMITS = []
|
||||||
|
_PIPE_ACTIVE = False
|
||||||
|
_PIPE_IS_LAST = False
|
||||||
|
_LAST_PIPELINE_CAPTURE = None
|
||||||
|
_PIPELINE_REFRESHED = False
|
||||||
|
_PIPELINE_LAST_ITEMS = []
|
||||||
|
_PIPELINE_VALUES = {}
|
||||||
|
_PIPELINE_COMMAND_TEXT = ""
|
||||||
|
|
||||||
|
|
||||||
|
def get_emitted_items() -> List[Any]:
|
||||||
|
"""Get a copy of all items emitted by the current pipeline stage."""
|
||||||
|
return list(_PIPE_EMITS)
|
||||||
|
|
||||||
|
|
||||||
|
def clear_emits() -> None:
|
||||||
|
"""Clear the emitted items list (called between stages)."""
|
||||||
|
global _PIPE_EMITS
|
||||||
|
_PIPE_EMITS = []
|
||||||
|
|
||||||
|
|
||||||
|
def set_last_selection(indices: Sequence[int]) -> None:
|
||||||
|
"""Record the indices selected via @ syntax for the next cmdlet.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
indices: Iterable of 0-based indices captured from the REPL parser
|
||||||
|
"""
|
||||||
|
global _PIPELINE_LAST_SELECTION
|
||||||
|
_PIPELINE_LAST_SELECTION = list(indices or [])
|
||||||
|
|
||||||
|
|
||||||
|
def get_last_selection() -> List[int]:
|
||||||
|
"""Return the indices selected via @ syntax for the current invocation."""
|
||||||
|
return list(_PIPELINE_LAST_SELECTION)
|
||||||
|
|
||||||
|
|
||||||
|
def clear_last_selection() -> None:
|
||||||
|
"""Clear the cached selection indices after a cmdlet finishes."""
|
||||||
|
global _PIPELINE_LAST_SELECTION
|
||||||
|
_PIPELINE_LAST_SELECTION = []
|
||||||
|
|
||||||
|
|
||||||
|
def set_current_command_text(command_text: Optional[str]) -> None:
|
||||||
|
"""Record the raw pipeline/command text for downstream consumers."""
|
||||||
|
global _PIPELINE_COMMAND_TEXT
|
||||||
|
_PIPELINE_COMMAND_TEXT = (command_text or "").strip()
|
||||||
|
|
||||||
|
|
||||||
|
def get_current_command_text(default: str = "") -> str:
|
||||||
|
"""Return the last recorded command/pipeline text."""
|
||||||
|
text = _PIPELINE_COMMAND_TEXT.strip()
|
||||||
|
return text if text else default
|
||||||
|
|
||||||
|
|
||||||
|
def clear_current_command_text() -> None:
|
||||||
|
"""Clear the cached command text after execution completes."""
|
||||||
|
global _PIPELINE_COMMAND_TEXT
|
||||||
|
_PIPELINE_COMMAND_TEXT = ""
|
||||||
|
|
||||||
|
|
||||||
|
def set_active(active: bool) -> None:
|
||||||
|
"""Internal: Set whether we're in a pipeline context."""
|
||||||
|
global _PIPE_ACTIVE
|
||||||
|
_PIPE_ACTIVE = active
|
||||||
|
|
||||||
|
|
||||||
|
def set_last_stage(is_last: bool) -> None:
|
||||||
|
"""Internal: Set whether this is the last stage of the pipeline."""
|
||||||
|
global _PIPE_IS_LAST
|
||||||
|
_PIPE_IS_LAST = is_last
|
||||||
|
|
||||||
|
|
||||||
|
def set_search_query(query: Optional[str]) -> None:
|
||||||
|
"""Internal: Set the last search query for refresh purposes."""
|
||||||
|
global _LAST_SEARCH_QUERY
|
||||||
|
_LAST_SEARCH_QUERY = query
|
||||||
|
|
||||||
|
|
||||||
|
def get_search_query() -> Optional[str]:
|
||||||
|
"""Get the last search query."""
|
||||||
|
return _LAST_SEARCH_QUERY
|
||||||
|
|
||||||
|
|
||||||
|
def set_pipeline_refreshed(refreshed: bool) -> None:
|
||||||
|
"""Internal: Track whether the pipeline already refreshed results."""
|
||||||
|
global _PIPELINE_REFRESHED
|
||||||
|
_PIPELINE_REFRESHED = refreshed
|
||||||
|
|
||||||
|
|
||||||
|
def was_pipeline_refreshed() -> bool:
|
||||||
|
"""Check if the pipeline already refreshed results."""
|
||||||
|
return _PIPELINE_REFRESHED
|
||||||
|
|
||||||
|
|
||||||
|
def set_last_items(items: list) -> None:
|
||||||
|
"""Internal: Cache the last pipeline outputs."""
|
||||||
|
global _PIPELINE_LAST_ITEMS
|
||||||
|
_PIPELINE_LAST_ITEMS = list(items) if items else []
|
||||||
|
|
||||||
|
|
||||||
|
def get_last_items() -> List[Any]:
|
||||||
|
"""Get the last pipeline outputs."""
|
||||||
|
return list(_PIPELINE_LAST_ITEMS)
|
||||||
|
|
||||||
|
|
||||||
|
def set_last_capture(obj: Any) -> None:
|
||||||
|
"""Internal: Store ephemeral handoff for direct pipelines."""
|
||||||
|
global _LAST_PIPELINE_CAPTURE
|
||||||
|
_LAST_PIPELINE_CAPTURE = obj
|
||||||
|
|
||||||
|
|
||||||
|
def get_last_capture() -> Optional[Any]:
|
||||||
|
"""Get ephemeral pipeline handoff (e.g., URL --screen-shot | ...)."""
|
||||||
|
return _LAST_PIPELINE_CAPTURE
|
||||||
|
|
||||||
|
|
||||||
|
def set_ui_library_refresh_callback(callback: Any) -> None:
|
||||||
|
"""Set a callback to be called when library content is updated.
|
||||||
|
|
||||||
|
The callback will be called with:
|
||||||
|
callback(library_filter: str = 'local')
|
||||||
|
|
||||||
|
Args:
|
||||||
|
callback: A callable that accepts optional library_filter parameter
|
||||||
|
|
||||||
|
Example:
|
||||||
|
def my_refresh_callback(library_filter='local'):
|
||||||
|
print(f"Refresh library: {library_filter}")
|
||||||
|
set_ui_library_refresh_callback(my_refresh_callback)
|
||||||
|
"""
|
||||||
|
global _UI_LIBRARY_REFRESH_CALLBACK
|
||||||
|
_UI_LIBRARY_REFRESH_CALLBACK = callback
|
||||||
|
|
||||||
|
|
||||||
|
def get_ui_library_refresh_callback() -> Optional[Any]:
|
||||||
|
"""Get the current library refresh callback."""
|
||||||
|
return _UI_LIBRARY_REFRESH_CALLBACK
|
||||||
|
|
||||||
|
|
||||||
|
def trigger_ui_library_refresh(library_filter: str = 'local') -> None:
|
||||||
|
"""Trigger a library refresh in the UI if callback is registered.
|
||||||
|
|
||||||
|
This should be called from cmdlets/funacts after content is added to library.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
library_filter: Which library to refresh ('local', 'hydrus', etc)
|
||||||
|
"""
|
||||||
|
callback = get_ui_library_refresh_callback()
|
||||||
|
if callback:
|
||||||
|
try:
|
||||||
|
callback(library_filter)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[trigger_ui_library_refresh] Error calling refresh callback: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
|
def set_last_result_table(result_table: Optional[Any], items: Optional[List[Any]] = None) -> None:
|
||||||
|
"""Store the last result table and items for @ selection syntax.
|
||||||
|
|
||||||
|
This should be called after displaying a result table, so users can reference
|
||||||
|
rows with @2, @2-5, @{1,3,5} syntax in subsequent commands.
|
||||||
|
Also maintains a history stack for @.. navigation (restore previous result table).
|
||||||
|
|
||||||
|
Only selectable commands (search-file, download-data) should call this to create history.
|
||||||
|
For action commands (delete-tag, add-tag, etc), use set_last_result_table_preserve_history() instead.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
result_table: The ResultTable object that was displayed (or None)
|
||||||
|
items: List of items that populated the table (optional)
|
||||||
|
"""
|
||||||
|
global _LAST_RESULT_TABLE, _LAST_RESULT_ITEMS, _RESULT_TABLE_HISTORY, _DISPLAY_ITEMS, _DISPLAY_TABLE
|
||||||
|
|
||||||
|
# Push current table to history before replacing
|
||||||
|
if _LAST_RESULT_TABLE is not None:
|
||||||
|
_RESULT_TABLE_HISTORY.append((_LAST_RESULT_TABLE, _LAST_RESULT_ITEMS.copy()))
|
||||||
|
# Keep history size limited
|
||||||
|
if len(_RESULT_TABLE_HISTORY) > _MAX_RESULT_TABLE_HISTORY:
|
||||||
|
_RESULT_TABLE_HISTORY.pop(0)
|
||||||
|
|
||||||
|
# Set new current table and clear any display items/table
|
||||||
|
_DISPLAY_ITEMS = []
|
||||||
|
_DISPLAY_TABLE = None
|
||||||
|
_LAST_RESULT_TABLE = result_table
|
||||||
|
_LAST_RESULT_ITEMS = items or []
|
||||||
|
|
||||||
|
|
||||||
|
def set_last_result_table_overlay(result_table: Optional[Any], items: Optional[List[Any]] = None) -> None:
|
||||||
|
"""Set a result table as an overlay (display only, no history).
|
||||||
|
|
||||||
|
Used for commands like get-tag that want to show a formatted table but
|
||||||
|
should be treated as a transient view (closing it returns to previous table).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
result_table: The ResultTable object to display
|
||||||
|
items: List of items for @N selection
|
||||||
|
"""
|
||||||
|
global _DISPLAY_ITEMS, _DISPLAY_TABLE
|
||||||
|
|
||||||
|
_DISPLAY_TABLE = result_table
|
||||||
|
_DISPLAY_ITEMS = items or []
|
||||||
|
|
||||||
|
|
||||||
|
def set_last_result_table_preserve_history(result_table: Optional[Any], items: Optional[List[Any]] = None) -> None:
|
||||||
|
"""Update the last result table WITHOUT adding to history.
|
||||||
|
|
||||||
|
Used for action commands (delete-tag, add-tag, etc.) that modify data but shouldn't
|
||||||
|
create history entries. This allows @.. to navigate search results, not undo stacks.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
result_table: The ResultTable object that was displayed (or None)
|
||||||
|
items: List of items that populated the table (optional)
|
||||||
|
"""
|
||||||
|
global _LAST_RESULT_TABLE, _LAST_RESULT_ITEMS
|
||||||
|
|
||||||
|
# Update current table WITHOUT pushing to history
|
||||||
|
_LAST_RESULT_TABLE = result_table
|
||||||
|
_LAST_RESULT_ITEMS = items or []
|
||||||
|
|
||||||
|
|
||||||
|
def set_last_result_items_only(items: Optional[List[Any]]) -> None:
|
||||||
|
"""Store items for @N selection WITHOUT affecting history or saved search data.
|
||||||
|
|
||||||
|
Used for display-only commands (get-tag, get-url, etc.) and action commands
|
||||||
|
(delete-tag, add-tag, etc.) that emit results but shouldn't affect history.
|
||||||
|
|
||||||
|
These items are available for @1, @2, etc. selection in the next command,
|
||||||
|
but are NOT saved to history. This preserves search context for @.. navigation.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
items: List of items to select from
|
||||||
|
"""
|
||||||
|
global _DISPLAY_ITEMS, _DISPLAY_TABLE
|
||||||
|
|
||||||
|
# Store items for immediate @N selection, but DON'T modify _LAST_RESULT_ITEMS
|
||||||
|
# This ensures history contains original search data, not display transformations
|
||||||
|
_DISPLAY_ITEMS = items or []
|
||||||
|
# Clear display table since we're setting items only (CLI will generate table if needed)
|
||||||
|
_DISPLAY_TABLE = None
|
||||||
|
|
||||||
|
|
||||||
|
def restore_previous_result_table() -> bool:
|
||||||
|
"""Restore the previous result table from history (for @.. navigation).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if a previous table was restored, False if history is empty
|
||||||
|
"""
|
||||||
|
global _LAST_RESULT_TABLE, _LAST_RESULT_ITEMS, _RESULT_TABLE_HISTORY, _DISPLAY_ITEMS, _DISPLAY_TABLE
|
||||||
|
|
||||||
|
# If we have an active overlay (display items/table), clear it to "go back" to the underlying table
|
||||||
|
if _DISPLAY_ITEMS or _DISPLAY_TABLE:
|
||||||
|
_DISPLAY_ITEMS = []
|
||||||
|
_DISPLAY_TABLE = None
|
||||||
|
return True
|
||||||
|
|
||||||
|
if not _RESULT_TABLE_HISTORY:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Pop from history and restore
|
||||||
|
_LAST_RESULT_TABLE, _LAST_RESULT_ITEMS = _RESULT_TABLE_HISTORY.pop()
|
||||||
|
# Clear display items so get_last_result_items() falls back to restored items
|
||||||
|
_DISPLAY_ITEMS = []
|
||||||
|
_DISPLAY_TABLE = None
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def get_display_table() -> Optional[Any]:
|
||||||
|
"""Get the current display overlay table.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The ResultTable object, or None if no overlay table is set
|
||||||
|
"""
|
||||||
|
return _DISPLAY_TABLE
|
||||||
|
|
||||||
|
|
||||||
|
def get_last_result_table() -> Optional[Any]:
|
||||||
|
"""Get the current last result table.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The ResultTable object, or None if no table is set
|
||||||
|
"""
|
||||||
|
return _LAST_RESULT_TABLE
|
||||||
|
|
||||||
|
|
||||||
|
def get_last_result_items() -> List[Any]:
|
||||||
|
"""Get the items available for @N selection.
|
||||||
|
|
||||||
|
Returns items from display/action commands (get-tag, delete-tag, etc.) if available,
|
||||||
|
otherwise returns items from the last search command. This ensures @N selection
|
||||||
|
works for both display operations and search results.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of items, or empty list if no prior results
|
||||||
|
"""
|
||||||
|
# Prioritize items from display commands (get-tag, delete-tag, etc.)
|
||||||
|
# These are available for immediate @N selection
|
||||||
|
if _DISPLAY_ITEMS:
|
||||||
|
return _DISPLAY_ITEMS
|
||||||
|
# Fall back to items from last search/selectable command
|
||||||
|
return _LAST_RESULT_ITEMS
|
||||||
|
|
||||||
|
|
||||||
|
def get_last_result_table_source_command() -> Optional[str]:
|
||||||
|
"""Get the source command from the last displayed result table.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Command name (e.g., 'download-data') or None if not set
|
||||||
|
"""
|
||||||
|
if _LAST_RESULT_TABLE and hasattr(_LAST_RESULT_TABLE, 'source_command'):
|
||||||
|
return _LAST_RESULT_TABLE.source_command
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_last_result_table_source_args() -> List[str]:
|
||||||
|
"""Get the base source arguments from the last displayed result table.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of arguments (e.g., ['https://example.com']) or empty list
|
||||||
|
"""
|
||||||
|
if _LAST_RESULT_TABLE and hasattr(_LAST_RESULT_TABLE, 'source_args'):
|
||||||
|
return _LAST_RESULT_TABLE.source_args or []
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def get_last_result_table_row_selection_args(row_index: int) -> Optional[List[str]]:
|
||||||
|
"""Get the selection arguments for a specific row in the last result table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
row_index: Index of the row (0-based)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Selection arguments (e.g., ['-item', '3']) or None
|
||||||
|
"""
|
||||||
|
if _LAST_RESULT_TABLE and hasattr(_LAST_RESULT_TABLE, 'rows'):
|
||||||
|
if 0 <= row_index < len(_LAST_RESULT_TABLE.rows):
|
||||||
|
row = _LAST_RESULT_TABLE.rows[row_index]
|
||||||
|
if hasattr(row, 'selection_args'):
|
||||||
|
return row.selection_args
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def set_current_stage_table(result_table: Optional[Any]) -> None:
|
||||||
|
"""Store the current pipeline stage table for @N expansion.
|
||||||
|
|
||||||
|
Used by cmdlets that display tabular results (e.g., download-data with formats)
|
||||||
|
to make their result table available for @N expansion logic.
|
||||||
|
|
||||||
|
Does NOT push to history - purely for command expansion in the current pipeline.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
result_table: The ResultTable object (or None to clear)
|
||||||
|
"""
|
||||||
|
global _CURRENT_STAGE_TABLE
|
||||||
|
_CURRENT_STAGE_TABLE = result_table
|
||||||
|
|
||||||
|
|
||||||
|
def get_current_stage_table_source_command() -> Optional[str]:
|
||||||
|
"""Get the source command from the current pipeline stage table.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Command name (e.g., 'download-data') or None
|
||||||
|
"""
|
||||||
|
if _CURRENT_STAGE_TABLE and hasattr(_CURRENT_STAGE_TABLE, 'source_command'):
|
||||||
|
return _CURRENT_STAGE_TABLE.source_command
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_current_stage_table_source_args() -> List[str]:
|
||||||
|
"""Get the source arguments from the current pipeline stage table.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of arguments or empty list
|
||||||
|
"""
|
||||||
|
if _CURRENT_STAGE_TABLE and hasattr(_CURRENT_STAGE_TABLE, 'source_args'):
|
||||||
|
return _CURRENT_STAGE_TABLE.source_args or []
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def get_current_stage_table_row_selection_args(row_index: int) -> Optional[List[str]]:
|
||||||
|
"""Get the selection arguments for a row in the current pipeline stage table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
row_index: Index of the row (0-based)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Selection arguments or None
|
||||||
|
"""
|
||||||
|
if _CURRENT_STAGE_TABLE and hasattr(_CURRENT_STAGE_TABLE, 'rows'):
|
||||||
|
if 0 <= row_index < len(_CURRENT_STAGE_TABLE.rows):
|
||||||
|
row = _CURRENT_STAGE_TABLE.rows[row_index]
|
||||||
|
if hasattr(row, 'selection_args'):
|
||||||
|
return row.selection_args
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def clear_last_result() -> None:
|
||||||
|
"""Clear the stored last result table and items."""
|
||||||
|
global _LAST_RESULT_TABLE, _LAST_RESULT_ITEMS
|
||||||
|
_LAST_RESULT_TABLE = None
|
||||||
|
_LAST_RESULT_ITEMS = []
|
||||||
|
|
||||||
|
|
||||||
|
def emit_list(objects: List[Any]) -> None:
|
||||||
|
"""Emit a list of PipeObjects to the next pipeline stage.
|
||||||
|
|
||||||
|
This allows cmdlets to emit multiple results that are tracked as a list,
|
||||||
|
enabling downstream cmdlets to process all of them or filter by metadata.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
objects: List of PipeObject instances or dicts to emit
|
||||||
|
"""
|
||||||
|
if _CURRENT_CONTEXT is not None:
|
||||||
|
_CURRENT_CONTEXT.emit(objects)
|
||||||
|
else:
|
||||||
|
_PIPE_EMITS.append(objects)
|
||||||
183
pyproject.toml
Normal file
183
pyproject.toml
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
[build-system]
|
||||||
|
requires = ["setuptools>=65.0", "wheel"]
|
||||||
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
|
[project]
|
||||||
|
name = "medeia-macina"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Comprehensive media management and search platform with support for local files, Hydrus database, torrents, books, and P2P networks"
|
||||||
|
readme = "README.md"
|
||||||
|
requires-python = ">=3.9,<3.12"
|
||||||
|
license = {text = "MIT"}
|
||||||
|
authors = [
|
||||||
|
{name = "Your Name", email = "your.email@example.com"}
|
||||||
|
]
|
||||||
|
keywords = ["media", "search", "management", "hydrus", "download", "cli", "tui"]
|
||||||
|
classifiers = [
|
||||||
|
"Development Status :: 3 - Alpha",
|
||||||
|
"Environment :: Console",
|
||||||
|
"Intended Audience :: End Users/Desktop",
|
||||||
|
"License :: OSI Approved :: MIT License",
|
||||||
|
"Operating System :: OS Independent",
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
"Programming Language :: Python :: 3.9",
|
||||||
|
"Programming Language :: Python :: 3.10",
|
||||||
|
"Programming Language :: Python :: 3.11",
|
||||||
|
"Programming Language :: Python :: 3.12",
|
||||||
|
"Topic :: Multimedia",
|
||||||
|
"Topic :: Internet",
|
||||||
|
]
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
# Core CLI and TUI frameworks
|
||||||
|
"typer>=0.9.0",
|
||||||
|
"prompt-toolkit>=3.0.0",
|
||||||
|
"textual>=0.30.0",
|
||||||
|
|
||||||
|
# Media processing and downloading
|
||||||
|
"yt-dlp>=2023.11.0",
|
||||||
|
"yt-dlp-ejs", # EJS challenge solver scripts for YouTube JavaScript challenges
|
||||||
|
"requests>=2.31.0",
|
||||||
|
"httpx>=0.25.0",
|
||||||
|
"ffmpeg-python>=0.2.0",
|
||||||
|
|
||||||
|
# Document and data handling
|
||||||
|
"PyPDF2>=3.0.0",
|
||||||
|
"img2pdf>=0.6.0",
|
||||||
|
"mutagen>=1.46.0",
|
||||||
|
"cbor2>=4.0",
|
||||||
|
|
||||||
|
# Image and media support
|
||||||
|
"Pillow>=10.0.0",
|
||||||
|
"python-bidi>=0.4.2",
|
||||||
|
|
||||||
|
# Metadata extraction and processing
|
||||||
|
"musicbrainzngs>=0.7.0",
|
||||||
|
"beautifulsoup4>=4.12.0",
|
||||||
|
"lxml>=4.9.0",
|
||||||
|
|
||||||
|
# Advanced searching and libraries
|
||||||
|
"libgen-api>=1.0.0",
|
||||||
|
"aioslsk>=1.6.0",
|
||||||
|
"imdbinfo>=0.1.10",
|
||||||
|
|
||||||
|
# Encryption and security
|
||||||
|
"pycryptodome>=3.18.0",
|
||||||
|
|
||||||
|
# Data processing
|
||||||
|
"bencode3",
|
||||||
|
"tqdm>=4.66.0",
|
||||||
|
|
||||||
|
# Browser automation
|
||||||
|
"playwright>=1.40.0",
|
||||||
|
|
||||||
|
# Development and utilities
|
||||||
|
"python-dateutil>=2.8.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.optional-dependencies]
|
||||||
|
dev = [
|
||||||
|
# Testing
|
||||||
|
"pytest>=7.4.0",
|
||||||
|
"pytest-cov>=4.1.0",
|
||||||
|
"pytest-asyncio>=0.21.0",
|
||||||
|
|
||||||
|
# Code quality
|
||||||
|
"black>=23.11.0",
|
||||||
|
"flake8>=6.1.0",
|
||||||
|
"isort>=5.12.0",
|
||||||
|
"mypy>=1.7.0",
|
||||||
|
"pylint>=3.0.0",
|
||||||
|
|
||||||
|
# Documentation
|
||||||
|
"sphinx>=7.2.0",
|
||||||
|
"sphinx-rtd-theme>=1.3.0",
|
||||||
|
|
||||||
|
# Debugging and profiling
|
||||||
|
"ipython>=8.17.0",
|
||||||
|
"ipdb>=0.13.0",
|
||||||
|
"memory-profiler>=0.61.0",
|
||||||
|
|
||||||
|
# Version control and CI/CD helpers
|
||||||
|
"pre-commit>=3.5.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
mm = "medeia_macina.cli_entry:main"
|
||||||
|
medeia = "medeia_macina.cli_entry:main"
|
||||||
|
|
||||||
|
[project.urls]
|
||||||
|
Homepage = "https://github.com/yourusername/medeia-macina"
|
||||||
|
Documentation = "https://medeia-macina.readthedocs.io"
|
||||||
|
Repository = "https://github.com/yourusername/medeia-macina.git"
|
||||||
|
Issues = "https://github.com/yourusername/medeia-macina/issues"
|
||||||
|
|
||||||
|
[tool.setuptools]
|
||||||
|
packages = ["cmdlets", "helper", "TUI", "medeia_macina"]
|
||||||
|
|
||||||
|
[tool.black]
|
||||||
|
line-length = 100
|
||||||
|
target-version = ['py39', 'py310', 'py311', 'py312']
|
||||||
|
include = '\.pyi?$'
|
||||||
|
extend-exclude = '''
|
||||||
|
/(
|
||||||
|
# directories
|
||||||
|
\.eggs
|
||||||
|
| \.git
|
||||||
|
| \.hg
|
||||||
|
| \.mypy_cache
|
||||||
|
| \.tox
|
||||||
|
| \.venv
|
||||||
|
| _build
|
||||||
|
| buck-out
|
||||||
|
| build
|
||||||
|
| dist
|
||||||
|
| __pycache__
|
||||||
|
)/
|
||||||
|
'''
|
||||||
|
|
||||||
|
[tool.isort]
|
||||||
|
profile = "black"
|
||||||
|
line_length = 100
|
||||||
|
target_version = ["py39", "py310", "py311", "py312"]
|
||||||
|
|
||||||
|
[tool.mypy]
|
||||||
|
python_version = "3.9"
|
||||||
|
warn_return_any = true
|
||||||
|
warn_unused_configs = true
|
||||||
|
disallow_untyped_defs = false
|
||||||
|
disallow_incomplete_defs = false
|
||||||
|
check_untyped_defs = false
|
||||||
|
no_implicit_optional = true
|
||||||
|
warn_redundant_casts = true
|
||||||
|
warn_unused_ignores = true
|
||||||
|
warn_no_return = true
|
||||||
|
|
||||||
|
[tool.pylint.messages_control]
|
||||||
|
disable = [
|
||||||
|
"C0330", "C0326", # Bad whitespace
|
||||||
|
"R0913", # Too many arguments
|
||||||
|
"R0914", # Too many local variables
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
testpaths = ["tests"]
|
||||||
|
python_files = ["test_*.py", "*_test.py"]
|
||||||
|
addopts = "-v --cov=. --cov-report=html --cov-report=term-missing"
|
||||||
|
|
||||||
|
[tool.coverage.run]
|
||||||
|
branch = true
|
||||||
|
omit = [
|
||||||
|
"*/tests/*",
|
||||||
|
"*/__main__.py",
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.coverage.report]
|
||||||
|
exclude_lines = [
|
||||||
|
"pragma: no cover",
|
||||||
|
"def __repr__",
|
||||||
|
"raise AssertionError",
|
||||||
|
"raise NotImplementedError",
|
||||||
|
"if __name__ == .__main__.:",
|
||||||
|
"if TYPE_CHECKING:",
|
||||||
|
]
|
||||||
29
requirements-dev.txt
Normal file
29
requirements-dev.txt
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
# Development dependencies for Medeia-Macina
|
||||||
|
# Install with: pip install -r requirements-dev.txt
|
||||||
|
|
||||||
|
# Main requirements
|
||||||
|
-r requirements.txt
|
||||||
|
|
||||||
|
# Testing
|
||||||
|
pytest>=7.4.0
|
||||||
|
pytest-cov>=4.1.0
|
||||||
|
pytest-asyncio>=0.21.0
|
||||||
|
|
||||||
|
# Code quality
|
||||||
|
black>=23.11.0
|
||||||
|
flake8>=6.1.0
|
||||||
|
isort>=5.12.0
|
||||||
|
mypy>=1.7.0
|
||||||
|
pylint>=3.0.0
|
||||||
|
|
||||||
|
# Documentation
|
||||||
|
sphinx>=7.2.0
|
||||||
|
sphinx-rtd-theme>=1.3.0
|
||||||
|
|
||||||
|
# Debugging and profiling
|
||||||
|
ipython>=8.17.0
|
||||||
|
ipdb>=0.13.0
|
||||||
|
memory-profiler>=0.61.0
|
||||||
|
|
||||||
|
# Version control and CI/CD helpers
|
||||||
|
pre-commit>=3.5.0
|
||||||
43
requirements.txt
Normal file
43
requirements.txt
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
# Core CLI and TUI frameworks
|
||||||
|
typer>=0.9.0
|
||||||
|
prompt-toolkit>=3.0.0
|
||||||
|
textual>=0.30.0
|
||||||
|
|
||||||
|
# Media processing and downloading
|
||||||
|
yt-dlp>=2023.11.0
|
||||||
|
requests>=2.31.0
|
||||||
|
httpx>=0.25.0
|
||||||
|
ffmpeg-python>=0.2.0
|
||||||
|
|
||||||
|
# Document and data handling
|
||||||
|
PyPDF2>=3.0.0
|
||||||
|
img2pdf>=0.6.0
|
||||||
|
mutagen>=1.46.0
|
||||||
|
cbor2>=4.0
|
||||||
|
|
||||||
|
# Image and media support
|
||||||
|
Pillow>=10.0.0
|
||||||
|
python-bidi>=0.4.2
|
||||||
|
|
||||||
|
# Metadata extraction and processing
|
||||||
|
musicbrainzngs>=0.7.0
|
||||||
|
beautifulsoup4>=4.12.0
|
||||||
|
lxml>=4.9.0
|
||||||
|
|
||||||
|
# Advanced searching and libraries
|
||||||
|
libgen-api>=1.0.0
|
||||||
|
aioslsk>=1.6.0
|
||||||
|
imdbinfo>=0.1.10
|
||||||
|
|
||||||
|
# Encryption and security (if needed by Crypto usage)
|
||||||
|
pycryptodome>=3.18.0
|
||||||
|
|
||||||
|
# Data processing
|
||||||
|
bencode3
|
||||||
|
tqdm>=4.66.0
|
||||||
|
|
||||||
|
# Browser automation (for web scraping if needed)
|
||||||
|
playwright>=1.40.0
|
||||||
|
|
||||||
|
# Development and utilities
|
||||||
|
python-dateutil>=2.8.0
|
||||||
1228
result_table.py
Normal file
1228
result_table.py
Normal file
File diff suppressed because it is too large
Load Diff
39
setup.py
Normal file
39
setup.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
"""
|
||||||
|
Setup configuration for Medeia-Macina.
|
||||||
|
|
||||||
|
Medeia-Macina is a comprehensive media and data management system with support for:
|
||||||
|
- Video downloading from multiple sources (YouTube, etc.)
|
||||||
|
- Local and cloud-based file storage
|
||||||
|
- Advanced metadata and tag management
|
||||||
|
- Full-featured TUI and CLI interfaces
|
||||||
|
"""
|
||||||
|
|
||||||
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
|
with open("requirements.txt") as f:
|
||||||
|
requirements = [line.strip() for line in f if line.strip() and not line.startswith("#")]
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name="medeia-macina",
|
||||||
|
version="1.0.0",
|
||||||
|
description="Comprehensive media and data management system",
|
||||||
|
author="Anonymous",
|
||||||
|
python_requires=">=3.9",
|
||||||
|
packages=find_packages(exclude=["tests", "*.tests"]),
|
||||||
|
install_requires=requirements,
|
||||||
|
entry_points={
|
||||||
|
"console_scripts": [
|
||||||
|
"mm=medeia_macina.cli_entry:main",
|
||||||
|
"medeia=medeia_macina.cli_entry:main",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
classifiers=[
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
"Programming Language :: Python :: 3.9",
|
||||||
|
"Programming Language :: Python :: 3.10",
|
||||||
|
"Programming Language :: Python :: 3.11",
|
||||||
|
"Programming Language :: Python :: 3.12",
|
||||||
|
"License :: OSI Approved :: MIT License",
|
||||||
|
"Operating System :: OS Independent",
|
||||||
|
],
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user