df
smoke-mm / Install & smoke test mm --help (push) Has been cancelled

This commit is contained in:
2025-12-29 17:05:03 -08:00
parent 226de9316a
commit c019c00aed
104 changed files with 19669 additions and 12954 deletions
+12
View File
@@ -0,0 +1,12 @@
{
"editor.formatOnSave": true,
"editor.defaultFormatter": "ms-python.python",
"python.formatting.provider": "black",
"python.formatting.blackArgs": ["--line-length", "88"],
"[python]": {
"editor.defaultFormatter": "ms-python.python",
"editor.formatOnSave": true
}
}
+79 -81
View File
@@ -24,7 +24,7 @@ DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.
class HTTPClient:
"""Unified HTTP client with sync support."""
def __init__(
self,
timeout: float = DEFAULT_TIMEOUT,
@@ -35,7 +35,7 @@ class HTTPClient:
):
"""
Initialize HTTP client.
Args:
timeout: Request timeout in seconds
retries: Number of retries on transient failures
@@ -49,7 +49,7 @@ class HTTPClient:
self.verify_ssl = verify_ssl
self.base_headers = headers or {}
self._client: Optional[httpx.Client] = None
def __enter__(self):
"""Context manager entry."""
self._client = httpx.Client(
@@ -58,19 +58,19 @@ class HTTPClient:
headers=self._get_headers(),
)
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
if self._client:
self._client.close()
self._client = None
def _get_headers(self) -> Dict[str, str]:
"""Get request headers with user-agent."""
headers = {"User-Agent": self.user_agent}
headers.update(self.base_headers)
return headers
def get(
self,
url: str,
@@ -80,13 +80,13 @@ class HTTPClient:
) -> httpx.Response:
"""
Make a GET request.
Args:
url: Request URL
params: Query parameters
headers: Additional headers
allow_redirects: Follow redirects
Returns:
httpx.Response object
"""
@@ -97,7 +97,7 @@ class HTTPClient:
headers=headers,
follow_redirects=allow_redirects,
)
def post(
self,
url: str,
@@ -108,14 +108,14 @@ class HTTPClient:
) -> httpx.Response:
"""
Make a POST request.
Args:
url: Request URL
data: Form data
json: JSON data
files: Files to upload
headers: Additional headers
Returns:
httpx.Response object
"""
@@ -127,7 +127,7 @@ class HTTPClient:
files=files,
headers=headers,
)
def put(
self,
url: str,
@@ -139,7 +139,7 @@ class HTTPClient:
) -> httpx.Response:
"""
Make a PUT request.
Args:
url: Request URL
data: Form data
@@ -147,7 +147,7 @@ class HTTPClient:
content: Raw content
files: Files to upload
headers: Additional headers
Returns:
httpx.Response object
"""
@@ -168,11 +168,11 @@ class HTTPClient:
) -> httpx.Response:
"""
Make a DELETE request.
Args:
url: Request URL
headers: Additional headers
Returns:
httpx.Response object
"""
@@ -182,25 +182,20 @@ class HTTPClient:
headers=headers,
)
def request(
self,
method: str,
url: str,
**kwargs
) -> httpx.Response:
def request(self, method: str, url: str, **kwargs) -> httpx.Response:
"""
Make a generic HTTP request.
Args:
method: HTTP method
url: Request URL
**kwargs: Additional arguments
Returns:
httpx.Response object
"""
return self._request(method, url, **kwargs)
def download(
self,
url: str,
@@ -211,20 +206,20 @@ class HTTPClient:
) -> Path:
"""
Download a file from URL with optional progress tracking.
Args:
url: File URL
file_path: Local file path to save to
chunk_size: Download chunk size
progress_callback: Callback(bytes_downloaded, total_bytes)
headers: Additional headers
Returns:
Path object of downloaded file
"""
path = Path(file_path)
path.parent.mkdir(parents=True, exist_ok=True)
with self._request_stream("GET", url, headers=headers, follow_redirects=True) as response:
response.raise_for_status()
total_bytes = int(response.headers.get("content-length", 0))
@@ -236,7 +231,7 @@ class HTTPClient:
progress_callback(0, total_bytes)
except Exception:
pass
with open(path, "wb") as f:
for chunk in response.iter_bytes(chunk_size):
if chunk:
@@ -251,31 +246,31 @@ class HTTPClient:
progress_callback(bytes_downloaded, total_bytes)
except Exception:
pass
return path
def _request(
self,
method: str,
url: str,
raise_for_status: bool = True,
log_http_errors: bool = True,
**kwargs
**kwargs,
) -> httpx.Response:
"""
Make an HTTP request with automatic retries.
Args:
method: HTTP method
url: Request URL
**kwargs: Additional arguments for httpx.Client.request()
Returns:
httpx.Response object
"""
if not self._client:
raise RuntimeError("HTTPClient must be used with context manager (with statement)")
# Merge headers
if "headers" in kwargs and kwargs["headers"]:
headers = self._get_headers()
@@ -283,9 +278,9 @@ class HTTPClient:
kwargs["headers"] = headers
else:
kwargs["headers"] = self._get_headers()
last_exception = None
for attempt in range(self.retries):
try:
response = self._client.request(method, url, **kwargs)
@@ -312,26 +307,30 @@ class HTTPClient:
response_text = e.response.text[:200]
except:
response_text = "<unable to read response>"
logger.warning(f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}")
logger.warning(
f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}"
)
if attempt < self.retries - 1:
continue
except (httpx.RequestError, httpx.ConnectError) as e:
last_exception = e
logger.warning(f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}")
logger.warning(
f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}"
)
if attempt < self.retries - 1:
continue
if last_exception:
logger.error(f"Request failed after {self.retries} attempts: {url} - {last_exception}")
raise last_exception
raise RuntimeError("Request failed after retries")
def _request_stream(self, method: str, url: str, **kwargs):
"""Make a streaming request."""
if not self._client:
raise RuntimeError("HTTPClient must be used with context manager (with statement)")
# Merge headers
if "headers" in kwargs and kwargs["headers"]:
headers = self._get_headers()
@@ -339,13 +338,13 @@ class HTTPClient:
kwargs["headers"] = headers
else:
kwargs["headers"] = self._get_headers()
return self._client.stream(method, url, **kwargs)
class AsyncHTTPClient:
"""Unified async HTTP client with asyncio support."""
def __init__(
self,
timeout: float = DEFAULT_TIMEOUT,
@@ -356,7 +355,7 @@ class AsyncHTTPClient:
):
"""
Initialize async HTTP client.
Args:
timeout: Request timeout in seconds
retries: Number of retries on transient failures
@@ -370,7 +369,7 @@ class AsyncHTTPClient:
self.verify_ssl = verify_ssl
self.base_headers = headers or {}
self._client: Optional[httpx.AsyncClient] = None
async def __aenter__(self):
"""Async context manager entry."""
self._client = httpx.AsyncClient(
@@ -379,19 +378,19 @@ class AsyncHTTPClient:
headers=self._get_headers(),
)
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""Async context manager exit."""
if self._client:
await self._client.aclose()
self._client = None
def _get_headers(self) -> Dict[str, str]:
"""Get request headers with user-agent."""
headers = {"User-Agent": self.user_agent}
headers.update(self.base_headers)
return headers
async def get(
self,
url: str,
@@ -401,13 +400,13 @@ class AsyncHTTPClient:
) -> httpx.Response:
"""
Make an async GET request.
Args:
url: Request URL
params: Query parameters
headers: Additional headers
allow_redirects: Follow redirects
Returns:
httpx.Response object
"""
@@ -418,7 +417,7 @@ class AsyncHTTPClient:
headers=headers,
follow_redirects=allow_redirects,
)
async def post(
self,
url: str,
@@ -428,13 +427,13 @@ class AsyncHTTPClient:
) -> httpx.Response:
"""
Make an async POST request.
Args:
url: Request URL
data: Form data
json: JSON data
headers: Additional headers
Returns:
httpx.Response object
"""
@@ -445,7 +444,7 @@ class AsyncHTTPClient:
json=json,
headers=headers,
)
async def download(
self,
url: str,
@@ -456,25 +455,25 @@ class AsyncHTTPClient:
) -> Path:
"""
Download a file from URL asynchronously with optional progress tracking.
Args:
url: File URL
file_path: Local file path to save to
chunk_size: Download chunk size
progress_callback: Callback(bytes_downloaded, total_bytes)
headers: Additional headers
Returns:
Path object of downloaded file
"""
path = Path(file_path)
path.parent.mkdir(parents=True, exist_ok=True)
async with self._request_stream("GET", url, headers=headers) as response:
response.raise_for_status()
total_bytes = int(response.headers.get("content-length", 0))
bytes_downloaded = 0
with open(path, "wb") as f:
async for chunk in response.aiter_bytes(chunk_size):
if chunk:
@@ -482,29 +481,24 @@ class AsyncHTTPClient:
bytes_downloaded += len(chunk)
if progress_callback:
progress_callback(bytes_downloaded, total_bytes)
return path
async def _request(
self,
method: str,
url: str,
**kwargs
) -> httpx.Response:
async def _request(self, method: str, url: str, **kwargs) -> httpx.Response:
"""
Make an async HTTP request with automatic retries.
Args:
method: HTTP method
url: Request URL
**kwargs: Additional arguments for httpx.AsyncClient.request()
Returns:
httpx.Response object
"""
if not self._client:
raise RuntimeError("AsyncHTTPClient must be used with async context manager")
# Merge headers
if "headers" in kwargs and kwargs["headers"]:
headers = self._get_headers()
@@ -512,9 +506,9 @@ class AsyncHTTPClient:
kwargs["headers"] = headers
else:
kwargs["headers"] = self._get_headers()
last_exception = None
for attempt in range(self.retries):
try:
response = await self._client.request(method, url, **kwargs)
@@ -540,28 +534,32 @@ class AsyncHTTPClient:
response_text = e.response.text[:200]
except:
response_text = "<unable to read response>"
logger.warning(f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}")
logger.warning(
f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}"
)
if attempt < self.retries - 1:
await asyncio.sleep(0.5)
continue
except (httpx.RequestError, httpx.ConnectError) as e:
last_exception = e
logger.warning(f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}")
logger.warning(
f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}"
)
if attempt < self.retries - 1:
await asyncio.sleep(0.5)
continue
if last_exception:
logger.error(f"Request failed after {self.retries} attempts: {url} - {last_exception}")
raise last_exception
raise RuntimeError("Request failed after retries")
def _request_stream(self, method: str, url: str, **kwargs):
"""Make a streaming request."""
if not self._client:
raise RuntimeError("AsyncHTTPClient must be used with async context manager")
# Merge headers
if "headers" in kwargs and kwargs["headers"]:
headers = self._get_headers()
@@ -569,7 +567,7 @@ class AsyncHTTPClient:
kwargs["headers"] = headers
else:
kwargs["headers"] = self._get_headers()
return self._client.stream(method, url, **kwargs)
@@ -590,7 +588,7 @@ def download(
url: str,
file_path: str,
progress_callback: Optional[Callable[[int, int], None]] = None,
**kwargs
**kwargs,
) -> Path:
"""Quick file download without context manager."""
with HTTPClient() as client:
+360 -296
View File
File diff suppressed because it is too large Load Diff
+242 -227
View File
File diff suppressed because it is too large Load Diff
+797 -502
View File
File diff suppressed because it is too large Load Diff
+493 -143
View File
File diff suppressed because it is too large Load Diff
+92
View File
@@ -0,0 +1,92 @@
# get-url Command Enhancement Summary
## What Changed
Enhanced the `get-url` command in [cmdlet/get_url.py](cmdlet/get_url.py) to support searching for URLs across all stores with smart pattern matching.
## Key Features Added
### 1. URL Normalization (`_normalize_url_for_search`)
- Strips protocol prefixes: `https://`, `http://`, `ftp://`, etc.
- Removes `www.` prefix (case-insensitive)
- Converts to lowercase for case-insensitive matching
**Examples:**
- `https://www.youtube.com/watch?v=xx``youtube.com/watch?v=xx`
- `http://www.google.com``google.com`
- `FTP://cdn.example.com``cdn.example.com`
### 2. Wildcard Pattern Matching (`_match_url_pattern`)
- Supports `*` (matches any sequence) and `?` (matches single character)
- Case-insensitive matching
- Uses Python's `fnmatch` for robust pattern support
**Examples:**
- `youtube.com*` matches `youtube.com/watch`, `youtube.com/shorts`, etc.
- `*.example.com*` matches `cdn.example.com`, `api.example.com`, etc.
- `google.com/search*` matches `google.com/search?q=term`, etc.
### 3. Cross-Store URL Search (`_search_urls_across_stores`)
- Searches all configured stores (hydrus, folder, etc.)
- Finds matching URLs across all files in all stores
- Returns results grouped by store
- Emits `UrlItem` objects for pipelining
## Command Usage
### Search for URLs matching a pattern
```bash
get-url -url "www.google.com"
get-url -url "youtube.com*"
get-url -url "*.example.com*"
```
### Original usage (unchanged)
```bash
@1 | get-url
# Requires hash and store from piped result
```
## Implementation Details
### New Methods
- `_normalize_url_for_search(url)` - Static method to normalize URLs
- `_match_url_pattern(url, pattern)` - Static method to match with wildcards
- `_search_urls_across_stores(pattern, config)` - Search across all stores
### Modified Method
- `run()` - Enhanced to support `-url` flag for searching, fallback to original behavior
### Return Values
- **Search mode**: List of `UrlItem` objects grouped by store, exit code 0 if found, 1 if no matches
- **Original mode**: URLs for specific file, exit code 0 if found, 1 if not found
## Testing
A test script is included: [test_get_url_search.py](test_get_url_search.py)
**All tests pass:**
- ✓ URL normalization (protocol/www stripping)
- ✓ Wildcard pattern matching
- ✓ Case-insensitive matching
- ✓ Complex patterns with subdomains and paths
## Files Modified
- [cmdlet/get_url.py](cmdlet/get_url.py) - Enhanced with URL search functionality
- [docs/GET_URL_SEARCH.md](docs/GET_URL_SEARCH.md) - User documentation
- [test_get_url_search.py](test_get_url_search.py) - Test suite
## Backward Compatibility
✓ Fully backward compatible - original usage unchanged:
- `@1 | get-url` still works as before
- `-query` flag still works for hash lookups
- `-store` flag still required for direct lookups
## Error Handling
- Returns exit code 1 if no matches found (search mode)
- Returns exit code 1 if no store configured
- Gracefully handles store backend errors
- Logs errors to stderr without crashing
+195
View File
@@ -0,0 +1,195 @@
✅ IMPLEMENTATION COMPLETE: get-url URL Search Enhancement
═══════════════════════════════════════════════════════════════════════════════
WHAT WAS IMPLEMENTED
────────────────────────────────────────────────────────────────────────────────
Enhanced the `get-url` command to search for URLs across all stores with:
1. PROTOCOL STRIPPING
- Removes: https://, http://, ftp://, and other scheme prefixes
- Removes: www. prefix (case-insensitive)
- Example: https://www.youtube.com/watch?v=abc → youtube.com/watch?v=abc
2. WILDCARD PATTERN MATCHING
- Asterisk (*): matches any sequence of characters
- Question mark (?): matches exactly one character
- Case-insensitive matching
- Example: youtube.com* matches all YouTube URLs
3. CROSS-STORE SEARCHING
- Searches all configured stores (Hydrus, Folder, etc.)
- Finds matching URLs for all files in all stores
- Returns results grouped by store
- Emits UrlItem objects for pipelining
═══════════════════════════════════════════════════════════════════════════════
COMMAND USAGE
────────────────────────────────────────────────────────────────────────────────
SEARCH MODE (NEW):
get-url -url "www.google.com"
get-url -url "youtube.com*"
get-url -url "*.example.com*"
ORIGINAL MODE (UNCHANGED):
@1 | get-url
═══════════════════════════════════════════════════════════════════════════════
PRACTICAL EXAMPLES
────────────────────────────────────────────────────────────────────────────────
1. Find all YouTube video URLs:
$ get-url -url "youtube.com*"
Results show all files with YouTube URLs
2. Find specific video by URL:
$ get-url -url "https://www.youtube.com/watch?v=xx_88TDWmEs"
Returns: youtube.com/watch?v=xx_88tdwmes (normalized pattern)
3. Find by domain:
$ get-url -url "google.com"
Matches: google.com, www.google.com/search, google.com/maps
4. Find by subdomain pattern:
$ get-url -url "*.example.com*"
Matches: cdn.example.com, api.example.com, www.example.com
5. Find by path pattern:
$ get-url -url "youtube.com/watch*"
Matches: youtube.com/watch?v=123 (NOT youtube.com/shorts/abc)
═══════════════════════════════════════════════════════════════════════════════
FILES MODIFIED / CREATED
────────────────────────────────────────────────────────────────────────────────
MAIN IMPLEMENTATION:
✓ cmdlet/get_url.py
- Added: _normalize_url_for_search() method
- Added: _match_url_pattern() method
- Added: _search_urls_across_stores() method
- Modified: run() method to handle -url flag
- Lines: 281 total (was 127)
DOCUMENTATION:
✓ docs/GET_URL_SEARCH.md - Full feature documentation
✓ docs/GET_URL_QUICK_REF.md - Quick reference guide
✓ ENHANCEMENT_SUMMARY.md - Technical summary
TESTING:
✓ test_get_url_search.py - Comprehensive test suite
- URL normalization tests: 6/6 passed ✓
- Pattern matching tests: 9/9 passed ✓
═══════════════════════════════════════════════════════════════════════════════
IMPLEMENTATION DETAILS
────────────────────────────────────────────────────────────────────────────────
NEW METHODS (Static):
_normalize_url_for_search(url: str) -> str
Strips protocol and www prefix, returns lowercase
Examples:
"https://www.youtube.com/watch?v=xx" → "youtube.com/watch?v=xx"
"http://www.google.com" → "google.com"
"ftp://files.example.com" → "files.example.com"
_match_url_pattern(url: str, pattern: str) -> bool
Normalizes both URL and pattern, uses fnmatch for wildcard matching
Returns True if URL matches pattern, False otherwise
NEW METHODS (Instance):
_search_urls_across_stores(pattern: str, config: Dict) -> Tuple[List[UrlItem], List[str]]
Searches all stores for matching URLs
Returns: (matched_items, stores_searched)
MODIFIED METHOD:
run(result, args, config) -> int
Now handles:
1. If -url flag provided: Search mode
2. Otherwise: Original mode (hash+store lookup)
Maintains full backward compatibility
═══════════════════════════════════════════════════════════════════════════════
BACKWARD COMPATIBILITY
────────────────────────────────────────────────────────────────────────────────
✓ FULLY COMPATIBLE
- Original usage: @1 | get-url (unchanged)
- -query flag: Still works for hash lookups
- -store flag: Still required for direct lookups
- Return codes: Unchanged (0 = success, 1 = not found/error)
═══════════════════════════════════════════════════════════════════════════════
TEST RESULTS
────────────────────────────────────────────────────────────────────────────────
All 15 tests passed ✓
URL Normalization (6 tests):
✓ https://www.youtube.com/watch?v=xx_88TDWmEs
✓ http://www.google.com
✓ ftp://files.example.com/path
✓ HTTPS://WWW.EXAMPLE.COM
✓ www.example.com
✓ example.com
Pattern Matching (9 tests):
✓ youtube.com* matches youtube.com/watch
✓ youtube.com/watch* matches youtube.com/watch?v=123
✓ youtube.com/shorts* does NOT match watch?v=123
✓ google.com matches google.com
✓ google.com* matches google.com/search
✓ *.example.com* matches cdn.example.com
✓ *example.com* matches cdn.example.com
✓ example.com does NOT match example.org
✓ reddit.com* matches reddit.com/r/videos
═══════════════════════════════════════════════════════════════════════════════
NEXT STEPS (OPTIONAL)
────────────────────────────────────────────────────────────────────────────────
Future enhancements could include:
1. Performance optimization: Cache results from stores
2. Regex support: --regex flag for complex patterns
3. Limit flag: --limit N to cap results
4. Filter by store: --store NAME to search specific stores only
5. Exclude duplicates: --unique flag to deduplicate URLs
6. Export options: --json, --csv output formats
═══════════════════════════════════════════════════════════════════════════════
VERIFICATION
────────────────────────────────────────────────────────────────────────────────
✓ Python syntax: Valid (py_compile passed)
✓ Imports: All dependencies available
✓ Command registration: Successful
✓ Test suite: All 15 tests pass
✓ Backward compatibility: Fully maintained
✓ Error handling: Graceful with stderr logging
✓ Documentation: Complete with examples
═══════════════════════════════════════════════════════════════════════════════
READY FOR PRODUCTION ✓
The get-url command is now ready to use for URL searching across all stores
with intelligent pattern matching and normalization.
Usage:
get-url -url "www.google.com"
get-url -url "youtube.com*"
get-url -url "*.example.com*"
═══════════════════════════════════════════════════════════════════════════════
+60 -17
View File
@@ -150,7 +150,9 @@ def _osd_overlay_set_ass(client: MPVIPCClient, ass_text: str) -> Optional[dict]:
def _osd_overlay_clear(client: MPVIPCClient) -> None:
client.send_command({"command": {"name": "osd-overlay", "id": _LYRIC_OSD_OVERLAY_ID, "format": "none"}})
client.send_command(
{"command": {"name": "osd-overlay", "id": _LYRIC_OSD_OVERLAY_ID, "format": "none"}}
)
def _log(msg: str) -> None:
@@ -181,6 +183,8 @@ def _ipc_get_property(
if resp and resp.get("error") == "success":
return resp.get("data", default)
return default
def _http_get_json(url: str, *, timeout_s: float = 10.0) -> Optional[dict]:
try:
req = Request(
@@ -262,7 +266,9 @@ def _wrap_plain_lyrics_as_lrc(text: str) -> str:
return "\n".join(out) + "\n"
def _fetch_lrclib(*, artist: Optional[str], title: Optional[str], duration_s: Optional[float] = None) -> Optional[str]:
def _fetch_lrclib(
*, artist: Optional[str], title: Optional[str], duration_s: Optional[float] = None
) -> Optional[str]:
base = "https://lrclib.net/api"
# Require both artist and title; title-only lookups cause frequent mismatches.
@@ -506,7 +512,9 @@ def _write_temp_sub_file(*, key: str, text: str) -> Path:
tmp_dir.mkdir(parents=True, exist_ok=True)
ext = _infer_sub_extension(text)
digest = hashlib.sha1((key + "\n" + (text or "")).encode("utf-8", errors="ignore")).hexdigest()[:16]
digest = hashlib.sha1((key + "\n" + (text or "")).encode("utf-8", errors="ignore")).hexdigest()[
:16
]
safe_key = hashlib.sha1((key or "").encode("utf-8", errors="ignore")).hexdigest()[:12]
path = (tmp_dir / f"sub-{safe_key}-{digest}{ext}").resolve()
path.write_text(text or "", encoding="utf-8", errors="replace")
@@ -747,7 +755,9 @@ def _infer_store_for_target(*, target: str, config: dict) -> Optional[str]:
continue
root = None
try:
root = getattr(backend, "_location", None) or getattr(backend, "location", lambda: None)()
root = (
getattr(backend, "_location", None) or getattr(backend, "location", lambda: None)()
)
except Exception:
root = None
if not root:
@@ -816,7 +826,9 @@ def run_auto_overlay(*, mpv: MPV, poll_s: float = 0.15, config: Optional[dict] =
while True:
try:
# Toggle support (mpv Lua script sets this property; default to visible).
visible_raw = _ipc_get_property(client, _LYRIC_VISIBLE_PROP, True, raise_on_disconnect=True)
visible_raw = _ipc_get_property(
client, _LYRIC_VISIBLE_PROP, True, raise_on_disconnect=True
)
raw_path = _ipc_get_property(client, "path", None, raise_on_disconnect=True)
except ConnectionError:
try:
@@ -915,7 +927,9 @@ def run_auto_overlay(*, mpv: MPV, poll_s: float = 0.15, config: Optional[dict] =
if is_http:
# HTTP/HTTPS targets are only valid if they map to a store backend.
store_from_url = _extract_store_from_url_target(target)
store_name = store_from_url or _infer_hydrus_store_from_url_target(target=target, config=cfg)
store_name = store_from_url or _infer_hydrus_store_from_url_target(
target=target, config=cfg
)
if not store_name:
_log("HTTP target has no store mapping; lyrics disabled")
current_store_name = None
@@ -962,7 +976,9 @@ def run_auto_overlay(*, mpv: MPV, poll_s: float = 0.15, config: Optional[dict] =
except Exception:
meta = None
if meta is None:
_log(f"HTTP target not found in store DB (store={store_name!r} hash={current_file_hash}); lyrics disabled")
_log(
f"HTTP target not found in store DB (store={store_name!r} hash={current_file_hash}); lyrics disabled"
)
current_store_name = None
current_backend = None
current_key = None
@@ -988,9 +1004,15 @@ def run_auto_overlay(*, mpv: MPV, poll_s: float = 0.15, config: Optional[dict] =
file_hash=current_file_hash,
config=cfg,
)
current_key = f"{current_store_name}:{current_file_hash}" if current_store_name and current_file_hash else None
current_key = (
f"{current_store_name}:{current_file_hash}"
if current_store_name and current_file_hash
else None
)
_log(f"Resolved store={current_store_name!r} hash={current_file_hash!r} valid={bool(current_key)}")
_log(
f"Resolved store={current_store_name!r} hash={current_file_hash!r} valid={bool(current_key)}"
)
if not current_key or not current_backend:
current_store_name = None
@@ -1010,7 +1032,13 @@ def run_auto_overlay(*, mpv: MPV, poll_s: float = 0.15, config: Optional[dict] =
# Load/reload lyrics when we have a resolvable key and it differs from what we loaded.
# This is important for the autofetch path: the note can appear without the mpv target changing.
if current_key and current_key != last_loaded_key and current_store_name and current_file_hash and current_backend:
if (
current_key
and current_key != last_loaded_key
and current_store_name
and current_file_hash
and current_backend
):
notes: Dict[str, str] = {}
try:
notes = current_backend.get_note(current_file_hash, config=cfg) or {}
@@ -1018,7 +1046,9 @@ def run_auto_overlay(*, mpv: MPV, poll_s: float = 0.15, config: Optional[dict] =
notes = {}
try:
_log(f"Loaded notes keys: {sorted([str(k) for k in notes.keys()]) if isinstance(notes, dict) else 'N/A'}")
_log(
f"Loaded notes keys: {sorted([str(k) for k in notes.keys()]) if isinstance(notes, dict) else 'N/A'}"
)
except Exception:
_log("Loaded notes keys: <error>")
@@ -1062,7 +1092,11 @@ def run_auto_overlay(*, mpv: MPV, poll_s: float = 0.15, config: Optional[dict] =
# Throttle attempts per key to avoid hammering APIs.
autofetch_enabled = bool(cfg.get("lyric_autofetch", True))
now = time.time()
if autofetch_enabled and current_key != last_fetch_attempt_key and (now - last_fetch_attempt_at) > 2.0:
if (
autofetch_enabled
and current_key != last_fetch_attempt_key
and (now - last_fetch_attempt_at) > 2.0
):
last_fetch_attempt_key = current_key
last_fetch_attempt_at = now
@@ -1082,7 +1116,9 @@ def run_auto_overlay(*, mpv: MPV, poll_s: float = 0.15, config: Optional[dict] =
except Exception:
pass
_log(f"Autofetch query artist={artist!r} title={title!r} duration={duration_s!r}")
_log(
f"Autofetch query artist={artist!r} title={title!r} duration={duration_s!r}"
)
if not artist or not title:
_log("Autofetch skipped: requires both artist and title")
@@ -1091,13 +1127,19 @@ def run_auto_overlay(*, mpv: MPV, poll_s: float = 0.15, config: Optional[dict] =
fetched = _fetch_lrclib(
artist=artist,
title=title,
duration_s=float(duration_s) if isinstance(duration_s, (int, float)) else None,
duration_s=(
float(duration_s) if isinstance(duration_s, (int, float)) else None
),
)
if not fetched or not fetched.strip():
fetched = _fetch_lyrics_ovh(artist=artist, title=title)
if fetched and fetched.strip():
try:
ok = bool(current_backend.set_note(current_file_hash, "lyric", fetched, config=cfg))
ok = bool(
current_backend.set_note(
current_file_hash, "lyric", fetched, config=cfg
)
)
_log(f"Autofetch stored lyric note ok={ok}")
# Next loop iteration will re-load the note.
except Exception as exc:
@@ -1187,7 +1229,9 @@ def run_overlay(*, mpv: MPV, entries: List[LrcLine], poll_s: float = 0.15) -> in
client = mpv.client()
if not client.connect():
print("mpv IPC is not reachable (is mpv running with --input-ipc-server?).", file=sys.stderr)
print(
"mpv IPC is not reachable (is mpv running with --input-ipc-server?).", file=sys.stderr
)
return 3
while True:
@@ -1240,7 +1284,6 @@ def run_overlay(*, mpv: MPV, entries: List[LrcLine], poll_s: float = 0.15) -> in
time.sleep(poll_s)
def main(argv: Optional[List[str]] = None) -> int:
parser = argparse.ArgumentParser(prog="python -m MPV.lyric", add_help=True)
parser.add_argument(
+90 -45
View File
@@ -133,9 +133,7 @@ def _windows_list_lyric_helper_pids(ipc_path: str) -> List[int]:
# Use CIM to query command lines; output as JSON for robust parsing.
# Note: `ConvertTo-Json` returns a number for single item, array for many, or null.
ps_script = (
"$ipc = "
+ json.dumps(ipc_path)
+ "; "
"$ipc = " + json.dumps(ipc_path) + "; "
"Get-CimInstance Win32_Process | "
"Where-Object { $_.CommandLine -and $_.CommandLine -match ' -m\\s+MPV\\.lyric(\\s|$)' -and $_.CommandLine -match ('--ipc\\s+' + [regex]::Escape($ipc)) } | "
"Select-Object -ExpandProperty ProcessId | ConvertTo-Json -Compress"
@@ -201,6 +199,7 @@ def _windows_kill_pids(pids: List[int]) -> None:
class MPVIPCError(Exception):
"""Raised when MPV IPC communication fails."""
pass
@@ -248,7 +247,9 @@ class MPV:
finally:
client.disconnect()
def send(self, command: Dict[str, Any] | List[Any], silent: bool = False) -> Optional[Dict[str, Any]]:
def send(
self, command: Dict[str, Any] | List[Any], silent: bool = False
) -> Optional[Dict[str, Any]]:
client = self.client(silent=bool(silent))
try:
if not client.connect():
@@ -308,7 +309,7 @@ class MPV:
pass
def _q(s: str) -> str:
return '"' + s.replace('\\', '\\\\').replace('"', '\\"') + '"'
return '"' + s.replace("\\", "\\\\").replace('"', '\\"') + '"'
pipeline = f"download-media -url {_q(url)} -format {_q(fmt)}"
if store:
@@ -329,10 +330,18 @@ class MPV:
"pipeline": pipeline,
}
except Exception as exc:
return {"success": False, "stdout": "", "stderr": "", "error": f"{type(exc).__name__}: {exc}", "pipeline": pipeline}
return {
"success": False,
"stdout": "",
"stderr": "",
"error": f"{type(exc).__name__}: {exc}",
"pipeline": pipeline,
}
def get_playlist(self, silent: bool = False) -> Optional[List[Dict[str, Any]]]:
resp = self.send({"command": ["get_property", "playlist"], "request_id": 100}, silent=silent)
resp = self.send(
{"command": ["get_property", "playlist"], "request_id": 100}, silent=silent
)
if resp is None:
return None
if resp.get("error") == "success":
@@ -467,7 +476,11 @@ class MPV:
env["PYTHONUNBUFFERED"] = "1"
try:
existing_pp = env.get("PYTHONPATH")
env["PYTHONPATH"] = str(repo_root) if not existing_pp else (str(repo_root) + os.pathsep + str(existing_pp))
env["PYTHONPATH"] = (
str(repo_root)
if not existing_pp
else (str(repo_root) + os.pathsep + str(existing_pp))
)
except Exception:
pass
kwargs["env"] = env
@@ -486,7 +499,13 @@ class MPV:
except Exception:
flags |= 0x08000000
kwargs["creationflags"] = flags
kwargs.update({k: v for k, v in _windows_hidden_subprocess_kwargs().items() if k != "creationflags"})
kwargs.update(
{
k: v
for k, v in _windows_hidden_subprocess_kwargs().items()
if k != "creationflags"
}
)
_LYRIC_PROCESS = subprocess.Popen(cmd, **kwargs)
debug(f"Lyric loader started (log={log_path})")
@@ -608,10 +627,22 @@ class MPV:
flags |= 0x08000000
kwargs["creationflags"] = flags
# startupinfo is harmless for GUI apps; helps hide flashes for console-subsystem builds.
kwargs.update({k: v for k, v in _windows_hidden_subprocess_kwargs().items() if k != "creationflags"})
kwargs.update(
{
k: v
for k, v in _windows_hidden_subprocess_kwargs().items()
if k != "creationflags"
}
)
debug("Starting MPV")
subprocess.Popen(cmd, stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, **kwargs)
subprocess.Popen(
cmd,
stdin=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
**kwargs,
)
# Start the persistent pipeline helper eagerly so MPV Lua can issue
# non-blocking requests (e.g., format list prefetch) without needing
@@ -634,7 +665,11 @@ class MPV:
helper_env = os.environ.copy()
try:
existing_pp = helper_env.get("PYTHONPATH")
helper_env["PYTHONPATH"] = str(repo_root) if not existing_pp else (str(repo_root) + os.pathsep + str(existing_pp))
helper_env["PYTHONPATH"] = (
str(repo_root)
if not existing_pp
else (str(repo_root) + os.pathsep + str(existing_pp))
)
except Exception:
pass
@@ -650,7 +685,13 @@ class MPV:
except Exception:
flags |= 0x08000000
helper_kwargs["creationflags"] = flags
helper_kwargs.update({k: v for k, v in _windows_hidden_subprocess_kwargs().items() if k != "creationflags"})
helper_kwargs.update(
{
k: v
for k, v in _windows_hidden_subprocess_kwargs().items()
if k != "creationflags"
}
)
helper_kwargs["cwd"] = str(repo_root)
helper_kwargs["env"] = helper_env
@@ -668,10 +709,10 @@ class MPV:
def get_ipc_pipe_path() -> str:
"""Get the fixed IPC pipe/socket path for persistent MPV connection.
Uses a fixed name so all playback sessions connect to the same MPV
window/process instead of creating new instances.
Returns:
Path to IPC pipe (Windows) or socket (Linux/macOS)
"""
@@ -680,7 +721,7 @@ def get_ipc_pipe_path() -> str:
return str(override)
system = platform.system()
if system == "Windows":
return f"\\\\.\\pipe\\{FIXED_IPC_PIPE_NAME}"
elif system == "Darwin": # macOS
@@ -695,7 +736,7 @@ def _unwrap_memory_target(text: Optional[str]) -> Optional[str]:
return text
for line in text.splitlines():
line = line.strip()
if not line or line.startswith('#') or line.startswith('memory://'):
if not line or line.startswith("#") or line.startswith("memory://"):
continue
return line
return text
@@ -703,14 +744,16 @@ def _unwrap_memory_target(text: Optional[str]) -> Optional[str]:
class MPVIPCClient:
"""Client for communicating with mpv via IPC socket/pipe.
This is the unified interface for all Python code to communicate with mpv.
It handles platform-specific differences (Windows named pipes vs Unix sockets).
"""
def __init__(self, socket_path: Optional[str] = None, timeout: float = 5.0, silent: bool = False):
def __init__(
self, socket_path: Optional[str] = None, timeout: float = 5.0, silent: bool = False
):
"""Initialize MPV IPC client.
Args:
socket_path: Path to IPC socket/pipe. If None, uses the fixed persistent path.
timeout: Socket timeout in seconds.
@@ -826,10 +869,10 @@ class MPVIPCClient:
except Exception:
pass
return None
def connect(self) -> bool:
"""Connect to mpv IPC socket.
Returns:
True if connection successful, False otherwise.
"""
@@ -838,7 +881,7 @@ class MPVIPCClient:
# Windows named pipes
try:
# Try to open the named pipe
self.sock = open(self.socket_path, 'r+b', buffering=0)
self.sock = open(self.socket_path, "r+b", buffering=0)
return True
except (OSError, IOError) as exc:
if not self.silent:
@@ -866,20 +909,20 @@ class MPVIPCClient:
debug(f"Failed to connect to MPV IPC: {exc}")
self.sock = None
return False
def send_command(self, command_data: Dict[str, Any] | List[Any]) -> Optional[Dict[str, Any]]:
"""Send a command to mpv and get response.
Args:
command_data: Command dict (e.g. {"command": [...]}) or list (e.g. ["loadfile", ...])
Returns:
Response dict with 'error' key (value 'success' on success), or None on error.
"""
if not self.sock:
if not self.connect():
return None
try:
# Format command as JSON (mpv IPC protocol)
request: Dict[str, Any]
@@ -887,20 +930,21 @@ class MPVIPCClient:
request = {"command": command_data}
else:
request = command_data
# Add request_id if not present to match response
if "request_id" not in request:
request["request_id"] = int(_time.time() * 1000) % 100000
payload = json.dumps(request) + "\n"
# Debug: log the command being sent
from SYS.logger import debug as _debug
_debug(f"[IPC] Sending: {payload.strip()}")
# Send command
self._write_payload(payload)
# Receive response
# We need to read lines until we find the one with matching request_id
# or until timeout/error. MPV might send events in between.
@@ -909,20 +953,22 @@ class MPVIPCClient:
response_data = self._readline(timeout=self.timeout)
if response_data is None:
return None
if not response_data:
break
try:
lines = response_data.decode('utf-8', errors='replace').strip().split('\n')
lines = response_data.decode("utf-8", errors="replace").strip().split("\n")
for line in lines:
if not line: continue
if not line:
continue
resp = json.loads(line)
# Debug: log responses
from SYS.logger import debug as _debug
_debug(f"[IPC] Received: {line}")
# Check if this is the response to our request
if resp.get("request_id") == request.get("request_id"):
return resp
@@ -940,13 +986,13 @@ class MPVIPCClient:
debug(f"[MPV error] {resp}")
except json.JSONDecodeError:
pass
return None
except Exception as exc:
debug(f"Error sending command to MPV: {exc}")
self.disconnect()
return None
def disconnect(self) -> None:
"""Disconnect from mpv IPC socket."""
if self.sock:
@@ -955,17 +1001,16 @@ class MPVIPCClient:
except Exception:
pass
self.sock = None
def __del__(self) -> None:
"""Cleanup on object destruction."""
self.disconnect()
def __enter__(self):
"""Context manager entry."""
self.connect()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
self.disconnect()
+36 -33
View File
@@ -24,20 +24,19 @@ def setup_logging(log_file: Optional[Path] = None) -> logging.Logger:
"""Setup logging for MPV API calls."""
logger = logging.getLogger("mpv-lua-api")
logger.setLevel(logging.DEBUG)
if not logger.handlers:
if log_file:
handler = logging.FileHandler(str(log_file), encoding="utf-8")
else:
handler = logging.StreamHandler(sys.stderr)
formatter = logging.Formatter(
"[%(asctime)s][%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
"[%(asctime)s][%(levelname)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
)
handler.setFormatter(formatter)
logger.addHandler(handler)
return logger
@@ -54,19 +53,19 @@ def execute_pipeline(
dry_run: bool = False,
) -> Dict[str, Any]:
"""Execute a pipeline command and return result as JSON.
Args:
pipeline_cmd: Pipeline command string (e.g. "trim-file -path ... | add-file -store ...")
log_file: Optional path to helper log file for logging
dry_run: If True, log but don't execute
Returns:
JSON object with keys: success, stdout, stderr, error, returncode
"""
try:
if log_file:
log_to_helper(f"[api] execute_pipeline cmd={pipeline_cmd}", log_file)
if dry_run:
return {
"success": True,
@@ -76,30 +75,30 @@ def execute_pipeline(
"returncode": 0,
"cmd": pipeline_cmd,
}
# Call the CLI directly as subprocess
import subprocess
import shlex
# Parse the pipeline command into separate arguments
cmd_args = shlex.split(pipeline_cmd)
result = subprocess.run(
[sys.executable, "-m", "CLI"] + cmd_args,
capture_output=True,
text=True,
cwd=str(_ROOT_DIR),
env={**dict(__import__('os').environ), "MEDEIA_MPV_CALLER": "lua"},
env={**dict(__import__("os").environ), "MEDEIA_MPV_CALLER": "lua"},
)
if log_file:
log_to_helper(
f"[api] result returncode={result.returncode} len_stdout={len(result.stdout or '')} len_stderr={len(result.stderr or '')}",
log_file
log_file,
)
if result.stderr:
log_to_helper(f"[api] stderr: {result.stderr[:500]}", log_file)
return {
"success": result.returncode == 0,
"stdout": result.stdout or "",
@@ -108,12 +107,12 @@ def execute_pipeline(
"returncode": result.returncode,
"cmd": pipeline_cmd,
}
except Exception as exc:
msg = f"{type(exc).__name__}: {exc}"
if log_file:
log_to_helper(f"[api] exception {msg}", log_file)
return {
"success": False,
"stdout": "",
@@ -126,48 +125,52 @@ def execute_pipeline(
def handle_api_request(request_json: str, log_file: Optional[Path] = None) -> str:
"""Handle an API request from Lua and return JSON response.
Request format:
{
"cmd": "execute_pipeline",
"pipeline": "trim-file -path ... | add-file -store ...",
...
}
Response format: JSON with result of the operation.
"""
try:
request = json.loads(request_json)
cmd = request.get("cmd")
if cmd == "execute_pipeline":
pipeline_cmd = request.get("pipeline", "")
result = execute_pipeline(pipeline_cmd, log_file)
return json.dumps(result)
else:
return json.dumps({
"success": False,
"error": f"Unknown command: {cmd}",
})
return json.dumps(
{
"success": False,
"error": f"Unknown command: {cmd}",
}
)
except Exception as exc:
return json.dumps({
"success": False,
"error": f"{type(exc).__name__}: {exc}",
})
return json.dumps(
{
"success": False,
"error": f"{type(exc).__name__}: {exc}",
}
)
if __name__ == "__main__":
# When called from Lua via subprocess:
# python mpv_lua_api.py <json-request>
if len(sys.argv) < 2:
print(json.dumps({"success": False, "error": "No request provided"}))
sys.exit(1)
request_json = sys.argv[1]
log_file = Path(sys.argv[2]) if len(sys.argv) > 2 else None
response = handle_api_request(request_json, log_file)
print(response)
+60 -21
View File
@@ -17,6 +17,7 @@ Protocol (user-data properties):
This helper is intentionally minimal: one request at a time, last-write-wins.
"""
from __future__ import annotations
MEDEIA_MPV_HELPER_VERSION = "2025-12-19"
@@ -72,8 +73,6 @@ READY_PROP = "user-data/medeia-pipeline-ready"
OBS_ID_REQUEST = 1001
def _run_pipeline(pipeline_text: str, *, seeds: Any = None) -> Dict[str, Any]:
# Import after sys.path fix.
@@ -181,7 +180,13 @@ def _run_op(op: str, data: Any) -> Dict[str, Any]:
except Exception:
pass
cmd = [py, str((_repo_root() / "CLI.py").resolve()), "pipeline", "--pipeline", pipeline_text]
cmd = [
py,
str((_repo_root() / "CLI.py").resolve()),
"pipeline",
"--pipeline",
pipeline_text,
]
if seeds is not None:
try:
cmd.extend(["--seeds-json", json.dumps(seeds, ensure_ascii=False)])
@@ -265,7 +270,7 @@ def _run_op(op: str, data: Any) -> Dict[str, Any]:
name = None
if isinstance(instance_cfg, dict):
name = instance_cfg.get("NAME") or instance_cfg.get("name")
candidate = (str(name or instance_key or "").strip())
candidate = str(name or instance_key or "").strip()
if candidate:
seen.add(candidate)
choices = sorted(seen)
@@ -419,7 +424,11 @@ def _run_op(op: str, data: Any) -> Dict[str, Any]:
if dump and dump != "0" and isinstance(info, dict):
h = hashlib.sha1(url.encode("utf-8", errors="replace")).hexdigest()[:10]
out_path = _repo_root() / "Log" / f"ytdlp-probe-{h}.json"
out_path.write_text(json.dumps(info, ensure_ascii=False, indent=2), encoding="utf-8", errors="replace")
out_path.write_text(
json.dumps(info, ensure_ascii=False, indent=2),
encoding="utf-8",
errors="replace",
)
_append_helper_log(f"[ytdlp-formats] wrote probe json: {out_path}")
except Exception:
pass
@@ -610,7 +619,7 @@ def main(argv: Optional[list[str]] = None) -> int:
if debug_enabled:
logging.basicConfig(
level=logging.DEBUG,
format='[%(name)s] %(levelname)s: %(message)s',
format="[%(name)s] %(levelname)s: %(message)s",
stream=sys.stderr,
)
for noisy in ("httpx", "httpcore", "httpcore.http11", "httpcore.connection"):
@@ -628,13 +637,17 @@ def main(argv: Optional[list[str]] = None) -> int:
# Ensure single helper instance per ipc.
_lock_fh = _acquire_ipc_lock(str(args.ipc))
if _lock_fh is None:
_append_helper_log(f"[helper] another instance already holds lock for ipc={args.ipc}; exiting")
_append_helper_log(
f"[helper] another instance already holds lock for ipc={args.ipc}; exiting"
)
return 0
try:
_append_helper_log(f"[helper] version={MEDEIA_MPV_HELPER_VERSION} started ipc={args.ipc}")
try:
_append_helper_log(f"[helper] file={Path(__file__).resolve()} cwd={Path.cwd().resolve()}")
_append_helper_log(
f"[helper] file={Path(__file__).resolve()} cwd={Path.cwd().resolve()}"
)
except Exception:
pass
try:
@@ -651,6 +664,7 @@ def main(argv: Optional[list[str]] = None) -> int:
# Route SYS.logger output into the helper log file so diagnostics are not
# lost in mpv's console/terminal output.
try:
class _HelperLogStream:
def __init__(self) -> None:
self._pending = ""
@@ -726,7 +740,9 @@ def main(argv: Optional[list[str]] = None) -> int:
last_connect_error = f"{type(exc).__name__}: {exc}"
if time.time() > connect_deadline:
_append_helper_log(f"[helper] failed to connect ipc={args.ipc} error={last_connect_error or 'timeout'}")
_append_helper_log(
f"[helper] failed to connect ipc={args.ipc} error={last_connect_error or 'timeout'}"
)
return 2
# Keep trying.
@@ -789,31 +805,50 @@ def main(argv: Optional[list[str]] = None) -> int:
# can read immediately without waiting for a request/response cycle (which may timeout).
try:
startup_choices_payload = _run_op("store-choices", None)
startup_choices = startup_choices_payload.get("choices") if isinstance(startup_choices_payload, dict) else None
startup_choices = (
startup_choices_payload.get("choices")
if isinstance(startup_choices_payload, dict)
else None
)
if isinstance(startup_choices, list):
preview = ", ".join(str(x) for x in startup_choices[:50])
_append_helper_log(f"[helper] startup store-choices count={len(startup_choices)} items={preview}")
_append_helper_log(
f"[helper] startup store-choices count={len(startup_choices)} items={preview}"
)
# Publish to a cached property for Lua to read without IPC request.
try:
cached_json = json.dumps({"success": True, "choices": startup_choices}, ensure_ascii=False)
client.send_command_no_wait(["set_property_string", "user-data/medeia-store-choices-cached", cached_json])
_append_helper_log(f"[helper] published store-choices to user-data/medeia-store-choices-cached")
cached_json = json.dumps(
{"success": True, "choices": startup_choices}, ensure_ascii=False
)
client.send_command_no_wait(
["set_property_string", "user-data/medeia-store-choices-cached", cached_json]
)
_append_helper_log(
f"[helper] published store-choices to user-data/medeia-store-choices-cached"
)
except Exception as exc:
_append_helper_log(f"[helper] failed to publish store-choices: {type(exc).__name__}: {exc}")
_append_helper_log(
f"[helper] failed to publish store-choices: {type(exc).__name__}: {exc}"
)
else:
_append_helper_log("[helper] startup store-choices unavailable")
except Exception as exc:
_append_helper_log(f"[helper] startup store-choices failed: {type(exc).__name__}: {exc}")
# Also publish config temp directory if available
try:
from config import load_config
cfg = load_config()
temp_dir = cfg.get("temp", "").strip() or os.getenv("TEMP") or "/tmp"
if temp_dir:
client.send_command_no_wait(["set_property_string", "user-data/medeia-config-temp", temp_dir])
_append_helper_log(f"[helper] published config temp to user-data/medeia-config-temp={temp_dir}")
client.send_command_no_wait(
["set_property_string", "user-data/medeia-config-temp", temp_dir]
)
_append_helper_log(
f"[helper] published config temp to user-data/medeia-config-temp={temp_dir}"
)
except Exception as exc:
_append_helper_log(f"[helper] failed to publish config temp: {type(exc).__name__}: {exc}")
@@ -854,7 +889,9 @@ def main(argv: Optional[list[str]] = None) -> int:
if "quic" in lower_prefix and "DEBUG:" in text:
continue
# Suppress progress-bar style lines (keep true errors).
if ("ETA" in text or "%" in text) and ("ERROR:" not in text and "WARNING:" not in text):
if ("ETA" in text or "%" in text) and (
"ERROR:" not in text and "WARNING:" not in text
):
# Typical yt-dlp progress bar line.
if text.lstrip().startswith("["):
continue
@@ -966,7 +1003,9 @@ def main(argv: Optional[list[str]] = None) -> int:
try:
# IMPORTANT: don't wait for a response here; waiting would consume
# async events and can drop/skip property-change notifications.
client.send_command_no_wait(["set_property_string", RESPONSE_PROP, json.dumps(resp, ensure_ascii=False)])
client.send_command_no_wait(
["set_property_string", RESPONSE_PROP, json.dumps(resp, ensure_ascii=False)]
)
except Exception:
# If posting results fails, there's nothing more useful to do.
pass
+58 -27
View File
@@ -95,12 +95,18 @@ class AllDebrid(Provider):
return None
# Quiet mode when download-file is mid-pipeline.
quiet = bool(self.config.get("_quiet_background_output")) if isinstance(self.config, dict) else False
quiet = (
bool(self.config.get("_quiet_background_output"))
if isinstance(self.config, dict)
else False
)
unlocked_url = target
try:
unlocked = client.unlock_link(target)
if isinstance(unlocked, str) and unlocked.strip().startswith(("http://", "https://")):
if isinstance(unlocked, str) and unlocked.strip().startswith(
("http://", "https://")
):
unlocked_url = unlocked.strip()
except Exception as exc:
# Fall back to the raw link, but warn.
@@ -136,7 +142,11 @@ class AllDebrid(Provider):
try:
if downloaded_path.exists():
size = downloaded_path.stat().st_size
if size > 0 and size <= 250_000 and downloaded_path.suffix.lower() not in (".html", ".htm"):
if (
size > 0
and size <= 250_000
and downloaded_path.suffix.lower() not in (".html", ".htm")
):
head = downloaded_path.read_bytes()[:512]
try:
text = head.decode("utf-8", errors="ignore").lower()
@@ -147,7 +157,10 @@ class AllDebrid(Provider):
downloaded_path.unlink()
except Exception:
pass
log("[alldebrid] Download returned HTML page (not file bytes). Try again or check AllDebrid link status.", file=sys.stderr)
log(
"[alldebrid] Download returned HTML page (not file bytes). Try again or check AllDebrid link status.",
file=sys.stderr,
)
return None
except Exception:
pass
@@ -160,7 +173,9 @@ class AllDebrid(Provider):
return None
@staticmethod
def _flatten_files(items: Any, *, _prefix: Optional[List[str]] = None) -> Iterable[Dict[str, Any]]:
def _flatten_files(
items: Any, *, _prefix: Optional[List[str]] = None
) -> Iterable[Dict[str, Any]]:
"""Flatten AllDebrid magnet file tree into file dicts, preserving relative paths.
API commonly returns:
@@ -185,17 +200,17 @@ class AllDebrid(Provider):
if not isinstance(node, dict):
continue
children = node.get('e') or node.get('children')
children = node.get("e") or node.get("children")
if isinstance(children, list):
folder_name = node.get('n') or node.get('name')
folder_name = node.get("n") or node.get("name")
next_prefix = prefix
if isinstance(folder_name, str) and folder_name.strip():
next_prefix = prefix + [folder_name.strip()]
yield from AllDebrid._flatten_files(children, _prefix=next_prefix)
continue
name = node.get('n') or node.get('name')
link = node.get('l') or node.get('link')
name = node.get("n") or node.get("name")
link = node.get("l") or node.get("link")
if isinstance(name, str) and name.strip() and isinstance(link, str) and link.strip():
rel_parts = prefix + [name.strip()]
relpath = "/".join([p for p in rel_parts if p])
@@ -253,10 +268,15 @@ class AllDebrid(Provider):
except Exception:
magnet_status = {}
magnet_name = str(magnet_status.get('filename') or magnet_status.get('name') or magnet_status.get('hash') or f"magnet-{magnet_id}")
status_code = magnet_status.get('statusCode')
status_text = str(magnet_status.get('status') or "").strip() or "unknown"
ready = status_code == 4 or bool(magnet_status.get('ready'))
magnet_name = str(
magnet_status.get("filename")
or magnet_status.get("name")
or magnet_status.get("hash")
or f"magnet-{magnet_id}"
)
status_code = magnet_status.get("statusCode")
status_text = str(magnet_status.get("status") or "").strip() or "unknown"
ready = status_code == 4 or bool(magnet_status.get("ready"))
if not ready:
return [
@@ -280,18 +300,23 @@ class AllDebrid(Provider):
try:
files_result = client.magnet_links([magnet_id])
magnet_files = files_result.get(str(magnet_id), {}) if isinstance(files_result, dict) else {}
file_tree = magnet_files.get('files', []) if isinstance(magnet_files, dict) else []
magnet_files = (
files_result.get(str(magnet_id), {}) if isinstance(files_result, dict) else {}
)
file_tree = magnet_files.get("files", []) if isinstance(magnet_files, dict) else []
except Exception as exc:
log(f"[alldebrid] Failed to list files for magnet {magnet_id}: {exc}", file=sys.stderr)
log(
f"[alldebrid] Failed to list files for magnet {magnet_id}: {exc}",
file=sys.stderr,
)
file_tree = []
results: List[SearchResult] = []
for file_node in self._flatten_files(file_tree):
file_name = str(file_node.get('n') or file_node.get('name') or '').strip()
file_url = str(file_node.get('l') or file_node.get('link') or '').strip()
relpath = str(file_node.get('_relpath') or file_name or '').strip()
file_size = file_node.get('s') or file_node.get('size')
file_name = str(file_node.get("n") or file_node.get("name") or "").strip()
file_url = str(file_node.get("l") or file_node.get("link") or "").strip()
relpath = str(file_node.get("_relpath") or file_name or "").strip()
file_size = file_node.get("s") or file_node.get("size")
if not file_name or not file_url:
continue
@@ -356,16 +381,21 @@ class AllDebrid(Provider):
continue
try:
magnet_id = int(magnet.get('id'))
magnet_id = int(magnet.get("id"))
except Exception:
continue
magnet_name = str(magnet.get('filename') or magnet.get('name') or magnet.get('hash') or f"magnet-{magnet_id}")
magnet_name = str(
magnet.get("filename")
or magnet.get("name")
or magnet.get("hash")
or f"magnet-{magnet_id}"
)
magnet_name_lower = magnet_name.lower()
status_text = str(magnet.get('status') or "").strip() or "unknown"
status_code = magnet.get('statusCode')
ready = status_code == 4 or bool(magnet.get('ready'))
status_text = str(magnet.get("status") or "").strip() or "unknown"
status_code = magnet.get("statusCode")
ready = status_code == 4 or bool(magnet.get("ready"))
if wanted_id is not None:
if magnet_id != wanted_id:
@@ -375,7 +405,7 @@ class AllDebrid(Provider):
size_bytes: Optional[int] = None
try:
size_val = magnet.get('size')
size_val = magnet.get("size")
if isinstance(size_val, (int, float)):
size_bytes = int(size_val)
elif isinstance(size_val, str) and size_val.isdigit():
@@ -392,7 +422,8 @@ class AllDebrid(Provider):
annotations=["folder"],
media_kind="folder",
size_bytes=size_bytes,
tag={"alldebrid", "folder", str(magnet_id)} | ({"ready"} if ready else {"not-ready"}),
tag={"alldebrid", "folder", str(magnet_id)}
| ({"ready"} if ready else {"not-ready"}),
columns=[
("Folder", magnet_name),
("ID", str(magnet_id)),
+23 -7
View File
@@ -38,7 +38,9 @@ class Bandcamp(Provider):
# Bandcamp discography lives under /music.
return base.rstrip("/") + "/music"
def _scrape_artist_page(self, page: Any, artist_url: str, limit: int = 50) -> List[SearchResult]:
def _scrape_artist_page(
self, page: Any, artist_url: str, limit: int = 50
) -> List[SearchResult]:
"""Scrape an artist page for albums/tracks (discography)."""
base = self._base_url(artist_url)
discography_url = self._discography_url(artist_url)
@@ -74,13 +76,15 @@ class Bandcamp(Provider):
target = base.rstrip("/") + "/" + href
title_node = item.query_selector("p.title") or item.query_selector(".title")
title = (title_node.inner_text().strip() if title_node else "")
title = title_node.inner_text().strip() if title_node else ""
if title:
title = " ".join(title.split())
if not title:
title = target.rsplit("/", 1)[-1]
kind = "album" if "/album/" in target else ("track" if "/track/" in target else "item")
kind = (
"album" if "/album/" in target else ("track" if "/track/" in target else "item")
)
results.append(
SearchResult(
@@ -107,7 +111,9 @@ class Bandcamp(Provider):
return results
def selector(self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any) -> bool:
def selector(
self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any
) -> bool:
"""Handle Bandcamp `@N` selection.
If the selected item is an ARTIST result, selecting it auto-expands into
@@ -155,12 +161,16 @@ class Bandcamp(Provider):
continue
title = str(payload.get("title") or "").strip()
url_val = str(payload.get("url") or payload.get("path") or meta.get("url") or "").strip()
url_val = str(
payload.get("url") or payload.get("path") or meta.get("url") or ""
).strip()
base = self._base_url(url_val)
if not base:
continue
chosen.append({"title": title, "url": base, "location": str(meta.get("artist") or "").strip()})
chosen.append(
{"title": title, "url": base, "location": str(meta.get("artist") or "").strip()}
)
if not chosen:
return False
@@ -198,7 +208,13 @@ class Bandcamp(Provider):
try:
results_payload.append(r.to_dict())
except Exception:
results_payload.append({"table": "bandcamp", "title": getattr(r, "title", ""), "path": getattr(r, "path", "")})
results_payload.append(
{
"table": "bandcamp",
"title": getattr(r, "title", ""),
"path": getattr(r, "path", ""),
}
)
try:
ctx.set_last_result_table(table, results_payload)
+140 -128
View File
@@ -9,159 +9,171 @@ from SYS.logger import log
def _pick_provider_config(config: Any) -> Dict[str, Any]:
if not isinstance(config, dict):
return {}
provider = config.get("provider")
if not isinstance(provider, dict):
return {}
entry = provider.get("file.io")
if isinstance(entry, dict):
return entry
return {}
if not isinstance(config, dict):
return {}
provider = config.get("provider")
if not isinstance(provider, dict):
return {}
entry = provider.get("file.io")
if isinstance(entry, dict):
return entry
return {}
def _extract_link(payload: Any) -> Optional[str]:
if isinstance(payload, dict):
for key in ("link", "url", "downloadLink", "download_url"):
val = payload.get(key)
if isinstance(val, str) and val.strip().startswith(("http://", "https://")):
return val.strip()
for nested_key in ("data", "file", "result"):
nested = payload.get(nested_key)
found = _extract_link(nested)
if found:
return found
return None
if isinstance(payload, dict):
for key in ("link", "url", "downloadLink", "download_url"):
val = payload.get(key)
if isinstance(val, str) and val.strip().startswith(("http://", "https://")):
return val.strip()
for nested_key in ("data", "file", "result"):
nested = payload.get(nested_key)
found = _extract_link(nested)
if found:
return found
return None
def _extract_key(payload: Any) -> Optional[str]:
if isinstance(payload, dict):
for key in ("key", "id", "uuid"):
val = payload.get(key)
if isinstance(val, str) and val.strip():
return val.strip()
for nested_key in ("data", "file", "result"):
nested = payload.get(nested_key)
found = _extract_key(nested)
if found:
return found
return None
if isinstance(payload, dict):
for key in ("key", "id", "uuid"):
val = payload.get(key)
if isinstance(val, str) and val.strip():
return val.strip()
for nested_key in ("data", "file", "result"):
nested = payload.get(nested_key)
found = _extract_key(nested)
if found:
return found
return None
class FileIO(Provider):
"""File provider for file.io."""
"""File provider for file.io."""
def __init__(self, config: Optional[Dict[str, Any]] = None):
super().__init__(config)
conf = _pick_provider_config(self.config)
self._base_url = str(conf.get("base_url") or "https://file.io").strip().rstrip("/")
self._api_key = conf.get("api_key")
self._default_expires = conf.get("expires")
self._default_max_downloads = conf.get("maxDownloads")
if self._default_max_downloads is None:
self._default_max_downloads = conf.get("max_downloads")
self._default_auto_delete = conf.get("autoDelete")
if self._default_auto_delete is None:
self._default_auto_delete = conf.get("auto_delete")
def __init__(self, config: Optional[Dict[str, Any]] = None):
super().__init__(config)
conf = _pick_provider_config(self.config)
self._base_url = str(conf.get("base_url") or "https://file.io").strip().rstrip("/")
self._api_key = conf.get("api_key")
self._default_expires = conf.get("expires")
self._default_max_downloads = conf.get("maxDownloads")
if self._default_max_downloads is None:
self._default_max_downloads = conf.get("max_downloads")
self._default_auto_delete = conf.get("autoDelete")
if self._default_auto_delete is None:
self._default_auto_delete = conf.get("auto_delete")
def validate(self) -> bool:
return True
def validate(self) -> bool:
return True
def upload(self, file_path: str, **kwargs: Any) -> str:
from API.HTTP import HTTPClient
from models import ProgressFileReader
def upload(self, file_path: str, **kwargs: Any) -> str:
from API.HTTP import HTTPClient
from models import ProgressFileReader
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
data: Dict[str, Any] = {}
expires = kwargs.get("expires", self._default_expires)
max_downloads = kwargs.get("maxDownloads", kwargs.get("max_downloads", self._default_max_downloads))
auto_delete = kwargs.get("autoDelete", kwargs.get("auto_delete", self._default_auto_delete))
data: Dict[str, Any] = {}
expires = kwargs.get("expires", self._default_expires)
max_downloads = kwargs.get(
"maxDownloads", kwargs.get("max_downloads", self._default_max_downloads)
)
auto_delete = kwargs.get("autoDelete", kwargs.get("auto_delete", self._default_auto_delete))
if expires not in (None, ""):
data["expires"] = expires
if max_downloads not in (None, ""):
data["maxDownloads"] = max_downloads
if auto_delete not in (None, ""):
data["autoDelete"] = auto_delete
if expires not in (None, ""):
data["expires"] = expires
if max_downloads not in (None, ""):
data["maxDownloads"] = max_downloads
if auto_delete not in (None, ""):
data["autoDelete"] = auto_delete
headers: Dict[str, str] = {"User-Agent": "Medeia-Macina/1.0", "Accept": "application/json"}
if isinstance(self._api_key, str) and self._api_key.strip():
# Some file.io plans use bearer tokens; keep optional.
headers["Authorization"] = f"Bearer {self._api_key.strip()}"
headers: Dict[str, str] = {"User-Agent": "Medeia-Macina/1.0", "Accept": "application/json"}
if isinstance(self._api_key, str) and self._api_key.strip():
# Some file.io plans use bearer tokens; keep optional.
headers["Authorization"] = f"Bearer {self._api_key.strip()}"
try:
with HTTPClient(headers=headers) as client:
with open(file_path, "rb") as handle:
filename = os.path.basename(file_path)
try:
total = os.path.getsize(file_path)
except Exception:
total = None
wrapped = ProgressFileReader(handle, total_bytes=total, label="upload")
response = client.request(
"POST",
f"{self._base_url}/upload",
data=data or None,
files={"file": (filename, wrapped)},
follow_redirects=True,
raise_for_status=False,
)
try:
with HTTPClient(headers=headers) as client:
with open(file_path, "rb") as handle:
filename = os.path.basename(file_path)
try:
total = os.path.getsize(file_path)
except Exception:
total = None
wrapped = ProgressFileReader(handle, total_bytes=total, label="upload")
response = client.request(
"POST",
f"{self._base_url}/upload",
data=data or None,
files={"file": (filename, wrapped)},
follow_redirects=True,
raise_for_status=False,
)
if response.status_code >= 400:
location = response.headers.get("location") or response.headers.get("Location")
ct = response.headers.get("content-type") or response.headers.get("Content-Type")
raise Exception(f"Upload failed: {response.status_code} (content-type={ct}, location={location}) - {response.text}")
if response.status_code >= 400:
location = response.headers.get("location") or response.headers.get("Location")
ct = response.headers.get("content-type") or response.headers.get("Content-Type")
raise Exception(
f"Upload failed: {response.status_code} (content-type={ct}, location={location}) - {response.text}"
)
payload: Any
try:
payload = response.json()
except Exception:
payload = None
payload: Any
try:
payload = response.json()
except Exception:
payload = None
# If the server ignored our Accept header and returned HTML, this is almost
# certainly the wrong endpoint or an upstream block.
ct = (response.headers.get("content-type") or response.headers.get("Content-Type") or "").lower()
if (payload is None) and ("text/html" in ct):
raise Exception("file.io returned HTML instead of JSON; expected API response from /upload")
# If the server ignored our Accept header and returned HTML, this is almost
# certainly the wrong endpoint or an upstream block.
ct = (
response.headers.get("content-type") or response.headers.get("Content-Type") or ""
).lower()
if (payload is None) and ("text/html" in ct):
raise Exception(
"file.io returned HTML instead of JSON; expected API response from /upload"
)
if isinstance(payload, dict) and payload.get("success") is False:
reason = payload.get("message") or payload.get("error") or payload.get("status")
raise Exception(str(reason or "Upload failed"))
if isinstance(payload, dict) and payload.get("success") is False:
reason = payload.get("message") or payload.get("error") or payload.get("status")
raise Exception(str(reason or "Upload failed"))
uploaded_url = _extract_link(payload)
if not uploaded_url:
# Some APIs may return the link as plain text.
text = str(response.text or "").strip()
if text.startswith(("http://", "https://")):
uploaded_url = text
uploaded_url = _extract_link(payload)
if not uploaded_url:
# Some APIs may return the link as plain text.
text = str(response.text or "").strip()
if text.startswith(("http://", "https://")):
uploaded_url = text
if not uploaded_url:
key = _extract_key(payload)
if key:
uploaded_url = f"{self._base_url}/{key.lstrip('/')}"
if not uploaded_url:
key = _extract_key(payload)
if key:
uploaded_url = f"{self._base_url}/{key.lstrip('/')}"
if not uploaded_url:
try:
snippet = (response.text or "").strip()
if len(snippet) > 300:
snippet = snippet[:300] + "..."
except Exception:
snippet = "<unreadable response>"
raise Exception(f"Upload succeeded but response did not include a link (response: {snippet})")
if not uploaded_url:
try:
snippet = (response.text or "").strip()
if len(snippet) > 300:
snippet = snippet[:300] + "..."
except Exception:
snippet = "<unreadable response>"
raise Exception(
f"Upload succeeded but response did not include a link (response: {snippet})"
)
try:
pipe_obj = kwargs.get("pipe_obj")
if pipe_obj is not None:
from Store import Store
try:
pipe_obj = kwargs.get("pipe_obj")
if pipe_obj is not None:
from Store import Store
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(pipe_obj, uploaded_url)
except Exception:
pass
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(
pipe_obj, uploaded_url
)
except Exception:
pass
return uploaded_url
return uploaded_url
except Exception as exc:
log(f"[file.io] Upload error: {exc}", file=sys.stderr)
raise
except Exception as exc:
log(f"[file.io] Upload error: {exc}", file=sys.stderr)
raise
+17 -8
View File
@@ -110,7 +110,12 @@ def is_download_file_url(url: str) -> bool:
if not host.endswith("archive.org"):
return False
# /download/<identifier>/<filename>
return len(parts) >= 3 and parts[0].lower() == "download" and bool(parts[1].strip()) and bool(parts[2].strip())
return (
len(parts) >= 3
and parts[0].lower() == "download"
and bool(parts[1].strip())
and bool(parts[2].strip())
)
def list_download_files(identifier: str) -> List[Dict[str, Any]]:
@@ -482,12 +487,14 @@ class InternetArchive(Provider):
name = f.get("name")
if not name:
continue
files.append({
"name": str(name),
"size": getattr(f, "size", None),
"format": getattr(f, "format", None),
"source": getattr(f, "source", None),
})
files.append(
{
"name": str(name),
"size": getattr(f, "size", None),
"format": getattr(f, "format", None),
"source": getattr(f, "source", None),
}
)
except Exception:
files = []
@@ -657,7 +664,9 @@ class InternetArchive(Provider):
if pipe_obj is not None:
from Store import Store
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(pipe_obj, item_url)
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(
pipe_obj, item_url
)
except Exception:
pass
+157 -47
View File
@@ -111,7 +111,19 @@ def _parse_libgen_ads_tags_html(html: str) -> Dict[str, Any]:
score = 0
for ln in lines:
lo = ln.lower()
if ":" in ln and any(k in lo for k in ("title", "author", "publisher", "year", "isbn", "language", "series", "tags")):
if ":" in ln and any(
k in lo
for k in (
"title",
"author",
"publisher",
"year",
"isbn",
"language",
"series",
"tags",
)
):
score += 1
if score > best_score:
best_score = score
@@ -260,7 +272,9 @@ def _prefer_isbn(isbns: List[str]) -> str:
return vals[0] if vals else ""
def _enrich_book_tags_from_isbn(isbn: str, *, config: Optional[Dict[str, Any]] = None) -> Tuple[List[str], str]:
def _enrich_book_tags_from_isbn(
isbn: str, *, config: Optional[Dict[str, Any]] = None
) -> Tuple[List[str], str]:
"""Return (tags, source_name) for the given ISBN.
Priority:
@@ -378,7 +392,9 @@ def _enrich_book_tags_from_isbn(isbn: str, *, config: Optional[Dict[str, Any]] =
return [], ""
def _fetch_libgen_details_html(url: str, *, timeout: Optional[Tuple[float, float]] = None) -> Optional[str]:
def _fetch_libgen_details_html(
url: str, *, timeout: Optional[Tuple[float, float]] = None
) -> Optional[str]:
try:
if timeout is None:
timeout = (DEFAULT_CONNECT_TIMEOUT, DEFAULT_READ_TIMEOUT)
@@ -450,7 +466,9 @@ def _parse_libgen_details_html(html: str) -> Dict[str, Any]:
label = label[:-1].strip()
chunk_start = m.end()
chunk_end = strong_matches[idx + 1].start() if (idx + 1) < len(strong_matches) else len(s)
chunk_end = (
strong_matches[idx + 1].start() if (idx + 1) < len(strong_matches) else len(s)
)
raw_val_html = s[chunk_start:chunk_end]
# If we already have a value for this label from a table row, keep it.
@@ -600,7 +618,19 @@ def _libgen_metadata_to_tags(meta: Dict[str, Any]) -> List[str]:
if isinstance(raw_fields, dict):
for k, v in raw_fields.items():
lk = str(k or "").strip().lower()
if lk in {"title", "author(s)", "authors", "author", "publisher", "year", "isbn", "language", "oclc/worldcat", "tags", "edition id"}:
if lk in {
"title",
"author(s)",
"authors",
"author",
"publisher",
"year",
"isbn",
"language",
"oclc/worldcat",
"tags",
"edition id",
}:
continue
vv = str(v or "").strip()
if not vv:
@@ -755,7 +785,15 @@ class Libgen(Provider):
if title and title.startswith("http"):
title = ""
base_name = sanitize_filename(title or md5 or (f"libgen_{_libgen_id_from_url(target)}" if _libgen_id_from_url(target) else "libgen"))
base_name = sanitize_filename(
title
or md5
or (
f"libgen_{_libgen_id_from_url(target)}"
if _libgen_id_from_url(target)
else "libgen"
)
)
out_path = output_dir / base_name
if extension:
out_path = out_path.with_suffix(f".{extension}")
@@ -782,14 +820,23 @@ class Libgen(Provider):
return
total = int(content_length) if content_length and content_length > 0 else None
downloaded = int(bytes_downloaded) if bytes_downloaded and bytes_downloaded > 0 else 0
downloaded = (
int(bytes_downloaded) if bytes_downloaded and bytes_downloaded > 0 else 0
)
elapsed = max(0.001, now - start_time)
speed = downloaded / elapsed
progress_bar.update(downloaded=downloaded, total=total, label=str(label or "download"), file=sys.stderr)
progress_bar.update(
downloaded=downloaded,
total=total,
label=str(label or "download"),
file=sys.stderr,
)
last_progress_time[0] = now
ok, final_path = download_from_mirror(target, out_path, progress_callback=progress_callback)
ok, final_path = download_from_mirror(
target, out_path, progress_callback=progress_callback
)
progress_bar.finish()
if ok and final_path:
# After the download completes, best-effort fetch details metadata (title + ISBN)
@@ -802,9 +849,13 @@ class Libgen(Provider):
# Parse it post-download (best-effort) and do NOT perform external
# enrichment (OpenLibrary/isbnsearch) unless the user later chooses to.
if ("/ads.php" in low) or ("/get.php" in low):
ads_url = target if "/ads.php" in low else _libgen_ads_url_for_target(target)
ads_url = (
target if "/ads.php" in low else _libgen_ads_url_for_target(target)
)
if ads_url:
html = _fetch_libgen_details_html(ads_url, timeout=(DEFAULT_CONNECT_TIMEOUT, 4.0))
html = _fetch_libgen_details_html(
ads_url, timeout=(DEFAULT_CONNECT_TIMEOUT, 4.0)
)
if html:
meta = _parse_libgen_ads_tags_html(html)
extracted_title = str(meta.get("title") or "").strip()
@@ -814,8 +865,12 @@ class Libgen(Provider):
if (not title) or title.startswith("http"):
title = extracted_title
authors = meta.get("authors") if isinstance(meta.get("authors"), list) else []
for a in (authors or []):
authors = (
meta.get("authors")
if isinstance(meta.get("authors"), list)
else []
)
for a in authors or []:
aa = str(a or "").strip()
if aa:
result.tag.add(f"author:{aa}")
@@ -835,15 +890,25 @@ class Libgen(Provider):
md["language"] = language
result.tag.add(f"language:{language}")
isbns = meta.get("isbn") if isinstance(meta.get("isbn"), list) else []
isbns = [str(x).strip() for x in (isbns or []) if str(x).strip()]
isbns = (
meta.get("isbn")
if isinstance(meta.get("isbn"), list)
else []
)
isbns = [
str(x).strip() for x in (isbns or []) if str(x).strip()
]
if isbns:
md["isbn"] = isbns
for isbn_val in isbns:
result.tag.add(f"isbn:{isbn_val}")
free_tags = meta.get("tags") if isinstance(meta.get("tags"), list) else []
for t in (free_tags or []):
free_tags = (
meta.get("tags")
if isinstance(meta.get("tags"), list)
else []
)
for t in free_tags or []:
tt = str(t or "").strip()
if tt:
result.tag.add(tt)
@@ -853,7 +918,16 @@ class Libgen(Provider):
if isinstance(raw_fields, dict):
for k, v in raw_fields.items():
lk = str(k or "").strip().lower()
if lk in {"title", "author", "authors", "publisher", "year", "isbn", "language", "tags"}:
if lk in {
"title",
"author",
"authors",
"publisher",
"year",
"isbn",
"language",
"tags",
}:
continue
vv = str(v or "").strip()
if not vv:
@@ -863,7 +937,11 @@ class Libgen(Provider):
result.tag.add(f"libgen_{ns}:{vv}")
# Legacy: edition/file/series details pages (title + ISBN) + external enrichment.
if ("/edition.php" in low) or ("/file.php" in low) or ("/series.php" in low):
if (
("/edition.php" in low)
or ("/file.php" in low)
or ("/series.php" in low)
):
html = _fetch_libgen_details_html(target)
if html:
meta = _parse_libgen_details_html(html)
@@ -874,8 +952,14 @@ class Libgen(Provider):
meta["edition_id"] = eid
extracted_title = str(meta.get("title") or "").strip()
extracted_isbns = meta.get("isbn") if isinstance(meta.get("isbn"), list) else []
extracted_isbns = [str(x).strip() for x in (extracted_isbns or []) if str(x).strip()]
extracted_isbns = (
meta.get("isbn") if isinstance(meta.get("isbn"), list) else []
)
extracted_isbns = [
str(x).strip()
for x in (extracted_isbns or [])
if str(x).strip()
]
if extracted_title:
md["title"] = extracted_title
@@ -955,9 +1039,11 @@ class LibgenSearch:
def __init__(self, session: Optional[requests.Session] = None):
self.session = session or requests.Session()
self.session.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
})
self.session.headers.update(
{
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
)
def _search_libgen_json(
self,
@@ -1005,20 +1091,22 @@ class LibgenSearch:
download_link = f"http://library.lol/main/{md5}" if md5 else ""
results.append({
"id": str(raw_id),
"title": str(title),
"author": str(author),
"publisher": str(publisher),
"year": str(year),
"pages": str(pages),
"language": str(language),
"filesize_str": str(size),
"extension": str(extension),
"md5": str(md5),
"mirror_url": download_link,
"cover": "",
})
results.append(
{
"id": str(raw_id),
"title": str(title),
"author": str(author),
"publisher": str(publisher),
"year": str(year),
"pages": str(pages),
"language": str(language),
"filesize_str": str(size),
"extension": str(extension),
"md5": str(md5),
"mirror_url": download_link,
"cover": "",
}
)
if len(results) >= limit:
break
@@ -1063,7 +1151,9 @@ class LibgenSearch:
# Try JSON first on *all* mirrors (including .gl/.li), then fall back to HTML scraping.
results: List[Dict[str, Any]] = []
try:
results = self._search_libgen_json(mirror, query, limit, timeout=request_timeout)
results = self._search_libgen_json(
mirror, query, limit, timeout=request_timeout
)
except Exception:
results = []
@@ -1072,9 +1162,13 @@ class LibgenSearch:
continue
if "libgen.li" in mirror or "libgen.gl" in mirror:
results = self._search_libgen_li(mirror, query, limit, timeout=request_timeout)
results = self._search_libgen_li(
mirror, query, limit, timeout=request_timeout
)
else:
results = self._search_libgen_rs(mirror, query, limit, timeout=request_timeout)
results = self._search_libgen_rs(
mirror, query, limit, timeout=request_timeout
)
if results:
_call(log_info, f"[libgen] Using mirror: {mirror}")
@@ -1477,28 +1571,40 @@ def _resolve_download_url(
# get.php?md5=... -> file response
# Handle edition -> file links.
m = re.search(r'href=["\']([^"\']*file\.php\?id=\d+[^"\']*)["\']', html, flags=re.IGNORECASE)
m = re.search(
r'href=["\']([^"\']*file\.php\?id=\d+[^"\']*)["\']', html, flags=re.IGNORECASE
)
if m:
href = str(m.group(1) or "").strip()
if href and not href.lower().startswith("javascript:"):
return urljoin(base_url, href)
# Handle series -> edition links.
m = re.search(r'href=["\']([^"\']*edition\.php\?id=\d+[^"\']*)["\']', html, flags=re.IGNORECASE)
m = re.search(
r'href=["\']([^"\']*edition\.php\?id=\d+[^"\']*)["\']', html, flags=re.IGNORECASE
)
if m:
href = str(m.group(1) or "").strip()
if href and not href.lower().startswith("javascript:"):
return urljoin(base_url, href)
# Handle file -> ads/get links (sometimes present as the "Libgen" mirror).
m = re.search(r'href=["\']([^"\']*ads\.php\?md5=[a-fA-F0-9]{32}[^"\']*)["\']', html, flags=re.IGNORECASE)
m = re.search(
r'href=["\']([^"\']*ads\.php\?md5=[a-fA-F0-9]{32}[^"\']*)["\']',
html,
flags=re.IGNORECASE,
)
if m:
href = str(m.group(1) or "").strip()
if href and not href.lower().startswith("javascript:"):
return urljoin(base_url, href)
# Prefer explicit get.php md5 links (most common successful chain).
m = re.search(r'href=["\']([^"\']*get\.php\?md5=[a-fA-F0-9]{32}[^"\']*)["\']', html, flags=re.IGNORECASE)
m = re.search(
r'href=["\']([^"\']*get\.php\?md5=[a-fA-F0-9]{32}[^"\']*)["\']',
html,
flags=re.IGNORECASE,
)
if m:
href = str(m.group(1) or "").strip()
if href and not href.lower().startswith("javascript:"):
@@ -1540,7 +1646,9 @@ def _resolve_download_url(
_call(log_info, f"[resolve] Checking: {current_url}")
if current_url.lower().endswith((".pdf", ".epub", ".mobi", ".djvu", ".azw3", ".cbz", ".cbr")):
if current_url.lower().endswith(
(".pdf", ".epub", ".mobi", ".djvu", ".azw3", ".cbz", ".cbr")
):
return current_url
try:
@@ -1618,7 +1726,9 @@ def _guess_filename_extension(download_url: str, headers: Dict[str, str]) -> Opt
"""Guess the file extension from headers or the download URL."""
content_disposition = headers.get("content-disposition", "")
if content_disposition:
match = re.search(r"filename\*?=(?:UTF-8\'\'|\"?)([^\";]+)", content_disposition, flags=re.IGNORECASE)
match = re.search(
r"filename\*?=(?:UTF-8\'\'|\"?)([^\";]+)", content_disposition, flags=re.IGNORECASE
)
if match:
filename = unquote(match.group(1).strip('"'))
suffix = Path(filename).suffix
+3 -1
View File
@@ -56,7 +56,9 @@ class LOC(Provider):
page_size = max(1, min(int(limit), 50))
while len(results) < max(0, int(limit)):
payload = client.search_chronicling_america(q, start=start, count=page_size, extra_params=extra)
payload = client.search_chronicling_america(
q, start=start, count=page_size, extra_params=extra
)
items = payload.get("results")
if not isinstance(items, list) or not items:
break
+443 -387
View File
@@ -16,457 +16,513 @@ _MATRIX_INIT_CHECK_CACHE: Dict[str, Tuple[bool, Optional[str]]] = {}
def _sniff_mime_from_header(path: Path) -> Optional[str]:
"""Best-effort MIME sniffing from file headers.
"""Best-effort MIME sniffing from file headers.
Used when the file has no/unknown extension (common for exported/temp files).
Keeps dependencies to stdlib only.
"""
try:
if not path.exists() or not path.is_file():
return None
with open(path, "rb") as handle:
header = handle.read(512)
if not header:
return None
Used when the file has no/unknown extension (common for exported/temp files).
Keeps dependencies to stdlib only.
"""
try:
if not path.exists() or not path.is_file():
return None
with open(path, "rb") as handle:
header = handle.read(512)
if not header:
return None
# Images
if header.startswith(b"\xFF\xD8\xFF"):
return "image/jpeg"
if header.startswith(b"\x89PNG\r\n\x1a\n"):
return "image/png"
if header.startswith(b"GIF87a") or header.startswith(b"GIF89a"):
return "image/gif"
if header.startswith(b"BM"):
return "image/bmp"
if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"WEBP":
return "image/webp"
# Images
if header.startswith(b"\xff\xd8\xff"):
return "image/jpeg"
if header.startswith(b"\x89PNG\r\n\x1a\n"):
return "image/png"
if header.startswith(b"GIF87a") or header.startswith(b"GIF89a"):
return "image/gif"
if header.startswith(b"BM"):
return "image/bmp"
if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"WEBP":
return "image/webp"
# Audio
if header.startswith(b"fLaC"):
return "audio/flac"
if header.startswith(b"OggS"):
# Could be audio or video; treat as audio unless extension suggests video.
return "audio/ogg"
if header.startswith(b"ID3"):
return "audio/mpeg"
if len(header) >= 2 and header[0] == 0xFF and (header[1] & 0xE0) == 0xE0:
return "audio/mpeg"
if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"WAVE":
return "audio/wav"
# Audio
if header.startswith(b"fLaC"):
return "audio/flac"
if header.startswith(b"OggS"):
# Could be audio or video; treat as audio unless extension suggests video.
return "audio/ogg"
if header.startswith(b"ID3"):
return "audio/mpeg"
if len(header) >= 2 and header[0] == 0xFF and (header[1] & 0xE0) == 0xE0:
return "audio/mpeg"
if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"WAVE":
return "audio/wav"
# Video
if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"AVI ":
return "video/x-msvideo"
if header.startswith(b"\x1A\x45\xDF\xA3"):
# EBML container: Matroska/WebM.
return "video/x-matroska"
if len(header) >= 12 and header[4:8] == b"ftyp":
# ISO BMFF: mp4/mov/m4a. Default to mp4; extension can refine.
return "video/mp4"
# MPEG-TS / M2TS (sync byte every 188 bytes)
try:
if path.stat().st_size >= 188 * 2 and header[0] == 0x47:
with open(path, "rb") as handle:
handle.seek(188)
b = handle.read(1)
if b == b"\x47":
return "video/mp2t"
except Exception:
pass
# Video
if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"AVI ":
return "video/x-msvideo"
if header.startswith(b"\x1a\x45\xdf\xa3"):
# EBML container: Matroska/WebM.
return "video/x-matroska"
if len(header) >= 12 and header[4:8] == b"ftyp":
# ISO BMFF: mp4/mov/m4a. Default to mp4; extension can refine.
return "video/mp4"
# MPEG-TS / M2TS (sync byte every 188 bytes)
try:
if path.stat().st_size >= 188 * 2 and header[0] == 0x47:
with open(path, "rb") as handle:
handle.seek(188)
b = handle.read(1)
if b == b"\x47":
return "video/mp2t"
except Exception:
pass
return None
except Exception:
return None
return None
except Exception:
return None
def _classify_matrix_upload(path: Path, *, explicit_mime_type: Optional[str] = None) -> Tuple[str, str]:
"""Return (mime_type, msgtype) for Matrix uploads."""
mime_type = str(explicit_mime_type or "").strip() or None
def _classify_matrix_upload(
path: Path, *, explicit_mime_type: Optional[str] = None
) -> Tuple[str, str]:
"""Return (mime_type, msgtype) for Matrix uploads."""
mime_type = str(explicit_mime_type or "").strip() or None
if not mime_type:
# `mimetypes.guess_type` expects a string/URL; Path can return None on some platforms.
mime_type, _ = mimetypes.guess_type(str(path))
if not mime_type:
# `mimetypes.guess_type` expects a string/URL; Path can return None on some platforms.
mime_type, _ = mimetypes.guess_type(str(path))
if not mime_type:
mime_type = _sniff_mime_from_header(path)
if not mime_type:
mime_type = _sniff_mime_from_header(path)
# Refinements based on extension for ambiguous containers.
ext = path.suffix.lower()
if ext in {".m4a", ".aac"}:
mime_type = mime_type or "audio/mp4"
if ext in {".mkv", ".webm"}:
mime_type = mime_type or "video/x-matroska"
if ext in {".ogv"}:
mime_type = mime_type or "video/ogg"
# Refinements based on extension for ambiguous containers.
ext = path.suffix.lower()
if ext in {".m4a", ".aac"}:
mime_type = mime_type or "audio/mp4"
if ext in {".mkv", ".webm"}:
mime_type = mime_type or "video/x-matroska"
if ext in {".ogv"}:
mime_type = mime_type or "video/ogg"
msgtype = "m.file"
if mime_type:
mt = mime_type.casefold()
if mt.startswith("image/"):
msgtype = "m.image"
elif mt.startswith("audio/"):
msgtype = "m.audio"
elif mt.startswith("video/"):
msgtype = "m.video"
msgtype = "m.file"
if mime_type:
mt = mime_type.casefold()
if mt.startswith("image/"):
msgtype = "m.image"
elif mt.startswith("audio/"):
msgtype = "m.audio"
elif mt.startswith("video/"):
msgtype = "m.video"
# Final fallback for unknown MIME types.
if msgtype == "m.file":
audio_exts = {".mp3", ".flac", ".wav", ".m4a", ".aac", ".ogg", ".opus", ".wma", ".mka", ".alac"}
video_exts = {".mp4", ".mkv", ".webm", ".mov", ".avi", ".flv", ".mpg", ".mpeg", ".ts", ".m4v", ".wmv", ".m2ts", ".mts", ".3gp", ".ogv"}
image_exts = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff"}
if ext in audio_exts:
msgtype = "m.audio"
elif ext in video_exts:
msgtype = "m.video"
elif ext in image_exts:
msgtype = "m.image"
# Final fallback for unknown MIME types.
if msgtype == "m.file":
audio_exts = {
".mp3",
".flac",
".wav",
".m4a",
".aac",
".ogg",
".opus",
".wma",
".mka",
".alac",
}
video_exts = {
".mp4",
".mkv",
".webm",
".mov",
".avi",
".flv",
".mpg",
".mpeg",
".ts",
".m4v",
".wmv",
".m2ts",
".mts",
".3gp",
".ogv",
}
image_exts = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff"}
if ext in audio_exts:
msgtype = "m.audio"
elif ext in video_exts:
msgtype = "m.video"
elif ext in image_exts:
msgtype = "m.image"
return (mime_type or "application/octet-stream"), msgtype
return (mime_type or "application/octet-stream"), msgtype
def _normalize_homeserver(value: str) -> str:
text = str(value or "").strip()
if not text:
return ""
if not text.startswith("http"):
text = f"https://{text}"
return text.rstrip("/")
text = str(value or "").strip()
if not text:
return ""
if not text.startswith("http"):
text = f"https://{text}"
return text.rstrip("/")
def _matrix_health_check(*, homeserver: str, access_token: Optional[str]) -> Tuple[bool, Optional[str]]:
"""Lightweight Matrix reachability/auth validation.
def _matrix_health_check(
*, homeserver: str, access_token: Optional[str]
) -> Tuple[bool, Optional[str]]:
"""Lightweight Matrix reachability/auth validation.
- Always checks `/versions` (no auth).
- If `access_token` is present, also checks `/whoami`.
"""
try:
base = _normalize_homeserver(homeserver)
if not base:
return False, "Matrix homeserver missing"
- Always checks `/versions` (no auth).
- If `access_token` is present, also checks `/whoami`.
"""
try:
base = _normalize_homeserver(homeserver)
if not base:
return False, "Matrix homeserver missing"
resp = requests.get(f"{base}/_matrix/client/versions", timeout=5)
if resp.status_code != 200:
return False, f"Homeserver returned {resp.status_code}"
resp = requests.get(f"{base}/_matrix/client/versions", timeout=5)
if resp.status_code != 200:
return False, f"Homeserver returned {resp.status_code}"
if access_token:
headers = {"Authorization": f"Bearer {access_token}"}
resp = requests.get(f"{base}/_matrix/client/v3/account/whoami", headers=headers, timeout=5)
if resp.status_code != 200:
return False, f"Authentication failed: {resp.status_code}"
if access_token:
headers = {"Authorization": f"Bearer {access_token}"}
resp = requests.get(
f"{base}/_matrix/client/v3/account/whoami", headers=headers, timeout=5
)
if resp.status_code != 200:
return False, f"Authentication failed: {resp.status_code}"
return True, None
except Exception as exc:
return False, str(exc)
return True, None
except Exception as exc:
return False, str(exc)
class Matrix(Provider):
"""File provider for Matrix (Element) chat rooms."""
"""File provider for Matrix (Element) chat rooms."""
def __init__(self, config: Optional[Dict[str, Any]] = None):
super().__init__(config)
self._init_ok: Optional[bool] = None
self._init_reason: Optional[str] = None
def __init__(self, config: Optional[Dict[str, Any]] = None):
super().__init__(config)
self._init_ok: Optional[bool] = None
self._init_reason: Optional[str] = None
matrix_conf = self.config.get("provider", {}).get("matrix", {}) if isinstance(self.config, dict) else {}
homeserver = matrix_conf.get("homeserver")
access_token = matrix_conf.get("access_token")
password = matrix_conf.get("password")
matrix_conf = (
self.config.get("provider", {}).get("matrix", {})
if isinstance(self.config, dict)
else {}
)
homeserver = matrix_conf.get("homeserver")
access_token = matrix_conf.get("access_token")
password = matrix_conf.get("password")
# Not configured: keep instance but mark invalid via validate().
# Note: `room_id` is intentionally NOT required, since the CLI can prompt
# the user to select a room dynamically.
if not (homeserver and (access_token or password)):
self._init_ok = None
self._init_reason = None
return
# Not configured: keep instance but mark invalid via validate().
# Note: `room_id` is intentionally NOT required, since the CLI can prompt
# the user to select a room dynamically.
if not (homeserver and (access_token or password)):
self._init_ok = None
self._init_reason = None
return
cache_key = f"{_normalize_homeserver(str(homeserver))}|has_token:{bool(access_token)}"
cached = _MATRIX_INIT_CHECK_CACHE.get(cache_key)
if cached is None:
ok, reason = _matrix_health_check(homeserver=str(homeserver), access_token=str(access_token) if access_token else None)
_MATRIX_INIT_CHECK_CACHE[cache_key] = (ok, reason)
else:
ok, reason = cached
cache_key = f"{_normalize_homeserver(str(homeserver))}|has_token:{bool(access_token)}"
cached = _MATRIX_INIT_CHECK_CACHE.get(cache_key)
if cached is None:
ok, reason = _matrix_health_check(
homeserver=str(homeserver), access_token=str(access_token) if access_token else None
)
_MATRIX_INIT_CHECK_CACHE[cache_key] = (ok, reason)
else:
ok, reason = cached
self._init_ok = ok
self._init_reason = reason
if not ok:
raise Exception(reason or "Matrix unavailable")
self._init_ok = ok
self._init_reason = reason
if not ok:
raise Exception(reason or "Matrix unavailable")
def validate(self) -> bool:
if not self.config:
return False
if self._init_ok is False:
return False
matrix_conf = self.config.get("provider", {}).get("matrix", {})
return bool(
matrix_conf.get("homeserver")
and (matrix_conf.get("access_token") or matrix_conf.get("password"))
)
def validate(self) -> bool:
if not self.config:
return False
if self._init_ok is False:
return False
matrix_conf = self.config.get("provider", {}).get("matrix", {})
return bool(
matrix_conf.get("homeserver")
and (matrix_conf.get("access_token") or matrix_conf.get("password"))
)
def _get_homeserver_and_token(self) -> Tuple[str, str]:
matrix_conf = self.config.get("provider", {}).get("matrix", {})
homeserver = matrix_conf.get("homeserver")
access_token = matrix_conf.get("access_token")
if not homeserver:
raise Exception("Matrix homeserver missing")
if not access_token:
raise Exception("Matrix access_token missing")
base = _normalize_homeserver(str(homeserver))
if not base:
raise Exception("Matrix homeserver missing")
return base, str(access_token)
def _get_homeserver_and_token(self) -> Tuple[str, str]:
matrix_conf = self.config.get("provider", {}).get("matrix", {})
homeserver = matrix_conf.get("homeserver")
access_token = matrix_conf.get("access_token")
if not homeserver:
raise Exception("Matrix homeserver missing")
if not access_token:
raise Exception("Matrix access_token missing")
base = _normalize_homeserver(str(homeserver))
if not base:
raise Exception("Matrix homeserver missing")
return base, str(access_token)
def list_joined_room_ids(self) -> List[str]:
"""Return joined room IDs for the current user.
def list_joined_room_ids(self) -> List[str]:
"""Return joined room IDs for the current user.
Uses `GET /_matrix/client/v3/joined_rooms`.
"""
base, token = self._get_homeserver_and_token()
headers = {"Authorization": f"Bearer {token}"}
resp = requests.get(f"{base}/_matrix/client/v3/joined_rooms", headers=headers, timeout=10)
if resp.status_code != 200:
raise Exception(f"Matrix joined_rooms failed: {resp.text}")
data = resp.json() or {}
rooms = data.get("joined_rooms") or []
out: List[str] = []
for rid in rooms:
if not isinstance(rid, str) or not rid.strip():
continue
out.append(rid.strip())
return out
Uses `GET /_matrix/client/v3/joined_rooms`.
"""
base, token = self._get_homeserver_and_token()
headers = {"Authorization": f"Bearer {token}"}
resp = requests.get(f"{base}/_matrix/client/v3/joined_rooms", headers=headers, timeout=10)
if resp.status_code != 200:
raise Exception(f"Matrix joined_rooms failed: {resp.text}")
data = resp.json() or {}
rooms = data.get("joined_rooms") or []
out: List[str] = []
for rid in rooms:
if not isinstance(rid, str) or not rid.strip():
continue
out.append(rid.strip())
return out
def list_rooms(self, *, room_ids: Optional[List[str]] = None) -> List[Dict[str, Any]]:
"""Return joined rooms, optionally limited to a subset.
def list_rooms(self, *, room_ids: Optional[List[str]] = None) -> List[Dict[str, Any]]:
"""Return joined rooms, optionally limited to a subset.
Performance note: room names require additional per-room HTTP requests.
If `room_ids` is provided, only those rooms will have name lookups.
"""
base, token = self._get_homeserver_and_token()
headers = {"Authorization": f"Bearer {token}"}
Performance note: room names require additional per-room HTTP requests.
If `room_ids` is provided, only those rooms will have name lookups.
"""
base, token = self._get_homeserver_and_token()
headers = {"Authorization": f"Bearer {token}"}
joined = self.list_joined_room_ids()
if room_ids:
allowed = {str(v).strip().casefold() for v in room_ids if str(v).strip()}
if allowed:
# Accept either full IDs (!id:hs) or short IDs (!id).
def _is_allowed(rid: str) -> bool:
r = str(rid or "").strip()
if not r:
return False
rc = r.casefold()
if rc in allowed:
return True
short = r.split(":", 1)[0].strip().casefold()
return bool(short) and short in allowed
joined = self.list_joined_room_ids()
if room_ids:
allowed = {str(v).strip().casefold() for v in room_ids if str(v).strip()}
if allowed:
# Accept either full IDs (!id:hs) or short IDs (!id).
def _is_allowed(rid: str) -> bool:
r = str(rid or "").strip()
if not r:
return False
rc = r.casefold()
if rc in allowed:
return True
short = r.split(":", 1)[0].strip().casefold()
return bool(short) and short in allowed
joined = [rid for rid in joined if _is_allowed(rid)]
joined = [rid for rid in joined if _is_allowed(rid)]
out: List[Dict[str, Any]] = []
for room_id in joined:
name = ""
# Best-effort room name lookup (safe to fail).
try:
encoded = quote(room_id, safe="")
name_resp = requests.get(
f"{base}/_matrix/client/v3/rooms/{encoded}/state/m.room.name",
headers=headers,
timeout=5,
)
if name_resp.status_code == 200:
payload = name_resp.json() or {}
maybe = payload.get("name")
if isinstance(maybe, str):
name = maybe
except Exception:
pass
out.append({"room_id": room_id, "name": name})
return out
out: List[Dict[str, Any]] = []
for room_id in joined:
name = ""
# Best-effort room name lookup (safe to fail).
try:
encoded = quote(room_id, safe="")
name_resp = requests.get(
f"{base}/_matrix/client/v3/rooms/{encoded}/state/m.room.name",
headers=headers,
timeout=5,
)
if name_resp.status_code == 200:
payload = name_resp.json() or {}
maybe = payload.get("name")
if isinstance(maybe, str):
name = maybe
except Exception:
pass
out.append({"room_id": room_id, "name": name})
return out
def upload_to_room(self, file_path: str, room_id: str, **kwargs: Any) -> str:
"""Upload a file and send it to a specific room."""
from models import ProgressFileReader
path = Path(file_path)
if not path.exists():
raise FileNotFoundError(f"File not found: {file_path}")
if not room_id:
raise Exception("Matrix room_id missing")
def upload_to_room(self, file_path: str, room_id: str, **kwargs: Any) -> str:
"""Upload a file and send it to a specific room."""
from models import ProgressFileReader
base, token = self._get_homeserver_and_token()
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/octet-stream",
}
path = Path(file_path)
if not path.exists():
raise FileNotFoundError(f"File not found: {file_path}")
if not room_id:
raise Exception("Matrix room_id missing")
mime_type, msgtype = _classify_matrix_upload(path, explicit_mime_type=kwargs.get("mime_type"))
headers["Content-Type"] = mime_type
base, token = self._get_homeserver_and_token()
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/octet-stream",
}
filename = path.name
mime_type, msgtype = _classify_matrix_upload(
path, explicit_mime_type=kwargs.get("mime_type")
)
headers["Content-Type"] = mime_type
# Upload media
upload_url = f"{base}/_matrix/media/v3/upload"
with open(path, "rb") as handle:
wrapped = ProgressFileReader(handle, total_bytes=int(path.stat().st_size), label="upload")
resp = requests.post(upload_url, headers=headers, data=wrapped, params={"filename": filename})
if resp.status_code != 200:
raise Exception(f"Matrix upload failed: {resp.text}")
content_uri = (resp.json() or {}).get("content_uri")
if not content_uri:
raise Exception("No content_uri returned")
filename = path.name
# Build a fragment-free URL suitable for storage backends.
# `matrix.to` links use fragments (`#/...`) which some backends normalize away.
download_url_for_store = ""
try:
curi = str(content_uri or "").strip()
if curi.startswith("mxc://"):
rest = curi[len("mxc://"):]
if "/" in rest:
server_name, media_id = rest.split("/", 1)
server_name = str(server_name).strip()
media_id = str(media_id).strip()
if server_name and media_id:
download_url_for_store = f"{base}/_matrix/media/v3/download/{quote(server_name, safe='')}/{quote(media_id, safe='')}"
except Exception:
download_url_for_store = ""
# Upload media
upload_url = f"{base}/_matrix/media/v3/upload"
with open(path, "rb") as handle:
wrapped = ProgressFileReader(
handle, total_bytes=int(path.stat().st_size), label="upload"
)
resp = requests.post(
upload_url, headers=headers, data=wrapped, params={"filename": filename}
)
if resp.status_code != 200:
raise Exception(f"Matrix upload failed: {resp.text}")
content_uri = (resp.json() or {}).get("content_uri")
if not content_uri:
raise Exception("No content_uri returned")
info = {"mimetype": mime_type, "size": path.stat().st_size}
payload = {"msgtype": msgtype, "body": filename, "url": content_uri, "info": info}
# Build a fragment-free URL suitable for storage backends.
# `matrix.to` links use fragments (`#/...`) which some backends normalize away.
download_url_for_store = ""
try:
curi = str(content_uri or "").strip()
if curi.startswith("mxc://"):
rest = curi[len("mxc://") :]
if "/" in rest:
server_name, media_id = rest.split("/", 1)
server_name = str(server_name).strip()
media_id = str(media_id).strip()
if server_name and media_id:
download_url_for_store = f"{base}/_matrix/media/v3/download/{quote(server_name, safe='')}/{quote(media_id, safe='')}"
except Exception:
download_url_for_store = ""
# Correct Matrix client API send endpoint requires a transaction ID.
txn_id = f"mm_{int(time.time())}_{uuid.uuid4().hex[:8]}"
encoded_room = quote(str(room_id), safe="")
send_url = f"{base}/_matrix/client/v3/rooms/{encoded_room}/send/m.room.message/{txn_id}"
send_headers = {"Authorization": f"Bearer {token}"}
send_resp = requests.put(send_url, headers=send_headers, json=payload)
if send_resp.status_code != 200:
raise Exception(f"Matrix send message failed: {send_resp.text}")
info = {"mimetype": mime_type, "size": path.stat().st_size}
payload = {"msgtype": msgtype, "body": filename, "url": content_uri, "info": info}
event_id = (send_resp.json() or {}).get("event_id")
link = f"https://matrix.to/#/{room_id}/{event_id}" if event_id else f"https://matrix.to/#/{room_id}"
# Correct Matrix client API send endpoint requires a transaction ID.
txn_id = f"mm_{int(time.time())}_{uuid.uuid4().hex[:8]}"
encoded_room = quote(str(room_id), safe="")
send_url = f"{base}/_matrix/client/v3/rooms/{encoded_room}/send/m.room.message/{txn_id}"
send_headers = {"Authorization": f"Bearer {token}"}
send_resp = requests.put(send_url, headers=send_headers, json=payload)
if send_resp.status_code != 200:
raise Exception(f"Matrix send message failed: {send_resp.text}")
# Optional: if a PipeObject is provided and it already has store+hash,
# attach the uploaded URL back to the stored file.
try:
pipe_obj = kwargs.get("pipe_obj")
if pipe_obj is not None:
from Store import Store
event_id = (send_resp.json() or {}).get("event_id")
link = (
f"https://matrix.to/#/{room_id}/{event_id}"
if event_id
else f"https://matrix.to/#/{room_id}"
)
# Prefer the direct media download URL for storage backends.
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(
pipe_obj,
download_url_for_store or link,
)
except Exception:
pass
# Optional: if a PipeObject is provided and it already has store+hash,
# attach the uploaded URL back to the stored file.
try:
pipe_obj = kwargs.get("pipe_obj")
if pipe_obj is not None:
from Store import Store
return link
# Prefer the direct media download URL for storage backends.
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(
pipe_obj,
download_url_for_store or link,
)
except Exception:
pass
def send_text_to_room(self, text: str, room_id: str) -> str:
"""Send a plain text message to a specific room."""
message = str(text or "").strip()
if not message:
return ""
if not room_id:
raise Exception("Matrix room_id missing")
return link
base, token = self._get_homeserver_and_token()
encoded_room = quote(str(room_id), safe="")
txn_id = f"mm_{int(time.time())}_{uuid.uuid4().hex[:8]}"
send_url = f"{base}/_matrix/client/v3/rooms/{encoded_room}/send/m.room.message/{txn_id}"
send_headers = {"Authorization": f"Bearer {token}"}
payload = {"msgtype": "m.text", "body": message}
send_resp = requests.put(send_url, headers=send_headers, json=payload)
if send_resp.status_code != 200:
raise Exception(f"Matrix send text failed: {send_resp.text}")
def send_text_to_room(self, text: str, room_id: str) -> str:
"""Send a plain text message to a specific room."""
message = str(text or "").strip()
if not message:
return ""
if not room_id:
raise Exception("Matrix room_id missing")
event_id = (send_resp.json() or {}).get("event_id")
return f"https://matrix.to/#/{room_id}/{event_id}" if event_id else f"https://matrix.to/#/{room_id}"
base, token = self._get_homeserver_and_token()
encoded_room = quote(str(room_id), safe="")
txn_id = f"mm_{int(time.time())}_{uuid.uuid4().hex[:8]}"
send_url = f"{base}/_matrix/client/v3/rooms/{encoded_room}/send/m.room.message/{txn_id}"
send_headers = {"Authorization": f"Bearer {token}"}
payload = {"msgtype": "m.text", "body": message}
send_resp = requests.put(send_url, headers=send_headers, json=payload)
if send_resp.status_code != 200:
raise Exception(f"Matrix send text failed: {send_resp.text}")
def upload(self, file_path: str, **kwargs: Any) -> str:
matrix_conf = self.config.get("provider", {}).get("matrix", {})
room_id = matrix_conf.get("room_id")
if not room_id:
raise Exception("Matrix room_id missing")
return self.upload_to_room(file_path, str(room_id))
event_id = (send_resp.json() or {}).get("event_id")
return (
f"https://matrix.to/#/{room_id}/{event_id}"
if event_id
else f"https://matrix.to/#/{room_id}"
)
def selector(self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any) -> bool:
"""Handle Matrix room selection via `@N`.
def upload(self, file_path: str, **kwargs: Any) -> str:
matrix_conf = self.config.get("provider", {}).get("matrix", {})
room_id = matrix_conf.get("room_id")
if not room_id:
raise Exception("Matrix room_id missing")
return self.upload_to_room(file_path, str(room_id))
If the CLI has a pending upload stash, selecting a room triggers an upload.
"""
if not stage_is_last:
return False
def selector(
self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any
) -> bool:
"""Handle Matrix room selection via `@N`.
pending = None
try:
pending = ctx.load_value('matrix_pending_uploads', default=None)
except Exception:
pending = None
If the CLI has a pending upload stash, selecting a room triggers an upload.
"""
if not stage_is_last:
return False
pending_list = list(pending) if isinstance(pending, list) else []
if not pending_list:
return False
pending = None
try:
pending = ctx.load_value("matrix_pending_uploads", default=None)
except Exception:
pending = None
room_ids: List[str] = []
for item in selected_items or []:
rid = None
if isinstance(item, dict):
rid = item.get('room_id') or item.get('id')
else:
rid = getattr(item, 'room_id', None) or getattr(item, 'id', None)
if rid and str(rid).strip():
room_ids.append(str(rid).strip())
pending_list = list(pending) if isinstance(pending, list) else []
if not pending_list:
return False
if not room_ids:
print("No Matrix room selected\n")
return True
room_ids: List[str] = []
for item in selected_items or []:
rid = None
if isinstance(item, dict):
rid = item.get("room_id") or item.get("id")
else:
rid = getattr(item, "room_id", None) or getattr(item, "id", None)
if rid and str(rid).strip():
room_ids.append(str(rid).strip())
any_failed = False
for room_id in room_ids:
for payload in pending_list:
try:
file_path = ''
delete_after = False
pipe_obj = None
if isinstance(payload, dict):
file_path = str(payload.get('path') or '')
delete_after = bool(payload.get('delete_after', False))
pipe_obj = payload.get('pipe_obj')
else:
file_path = str(getattr(payload, 'path', '') or '')
if not file_path:
any_failed = True
continue
if not room_ids:
print("No Matrix room selected\n")
return True
media_path = Path(file_path)
if not media_path.exists():
any_failed = True
print(f"Matrix upload file missing: {file_path}")
continue
any_failed = False
for room_id in room_ids:
for payload in pending_list:
try:
file_path = ""
delete_after = False
pipe_obj = None
if isinstance(payload, dict):
file_path = str(payload.get("path") or "")
delete_after = bool(payload.get("delete_after", False))
pipe_obj = payload.get("pipe_obj")
else:
file_path = str(getattr(payload, "path", "") or "")
if not file_path:
any_failed = True
continue
link = self.upload_to_room(str(media_path), str(room_id), pipe_obj=pipe_obj)
if link:
print(link)
media_path = Path(file_path)
if not media_path.exists():
any_failed = True
print(f"Matrix upload file missing: {file_path}")
continue
if delete_after:
try:
media_path.unlink(missing_ok=True) # type: ignore[arg-type]
except TypeError:
try:
if media_path.exists():
media_path.unlink()
except Exception:
pass
except Exception as exc:
any_failed = True
print(f"Matrix upload failed: {exc}")
link = self.upload_to_room(str(media_path), str(room_id), pipe_obj=pipe_obj)
if link:
print(link)
try:
ctx.store_value('matrix_pending_uploads', [])
except Exception:
pass
if delete_after:
try:
media_path.unlink(missing_ok=True) # type: ignore[arg-type]
except TypeError:
try:
if media_path.exists():
media_path.unlink()
except Exception:
pass
except Exception as exc:
any_failed = True
print(f"Matrix upload failed: {exc}")
if any_failed:
print("\nOne or more Matrix uploads failed\n")
return True
try:
ctx.store_value("matrix_pending_uploads", [])
except Exception:
pass
if any_failed:
print("\nOne or more Matrix uploads failed\n")
return True
+58 -47
View File
@@ -100,7 +100,10 @@ class OpenLibraryMetadataProvider(MetadataProvider):
try:
# Prefer ISBN-specific search when the query looks like one
if query_clean.replace("-", "").isdigit() and len(query_clean.replace("-", "")) in (10, 13):
if query_clean.replace("-", "").isdigit() and len(query_clean.replace("-", "")) in (
10,
13,
):
q = f"isbn:{query_clean.replace('-', '')}"
else:
q = query_clean
@@ -135,23 +138,25 @@ class OpenLibraryMetadataProvider(MetadataProvider):
if isinstance(key, str) and key:
olid = key.split("/")[-1]
items.append({
"title": doc.get("title") or "",
"artist": ", ".join(authors) if authors else "",
"album": publisher,
"year": str(doc.get("first_publish_year") or ""),
"provider": self.name,
"authors": authors,
"publisher": publisher,
"identifiers": {
"isbn_13": isbn_13,
"isbn_10": isbn_10,
"openlibrary": olid,
"oclc": (doc.get("oclc_numbers") or [None])[0],
"lccn": (doc.get("lccn") or [None])[0],
},
"description": None,
})
items.append(
{
"title": doc.get("title") or "",
"artist": ", ".join(authors) if authors else "",
"album": publisher,
"year": str(doc.get("first_publish_year") or ""),
"provider": self.name,
"authors": authors,
"publisher": publisher,
"identifiers": {
"isbn_13": isbn_13,
"isbn_10": isbn_10,
"openlibrary": olid,
"oclc": (doc.get("oclc_numbers") or [None])[0],
"lccn": (doc.get("lccn") or [None])[0],
},
"description": None,
}
)
return items
@@ -238,17 +243,19 @@ class GoogleBooksMetadataProvider(MetadataProvider):
else:
identifiers.setdefault(ident_type, ident_value)
items.append({
"title": info.get("title") or "",
"artist": ", ".join(authors) if authors else "",
"album": publisher,
"year": year,
"provider": self.name,
"authors": authors,
"publisher": publisher,
"identifiers": identifiers,
"description": info.get("description", ""),
})
items.append(
{
"title": info.get("title") or "",
"artist": ", ".join(authors) if authors else "",
"album": publisher,
"year": year,
"provider": self.name,
"authors": authors,
"publisher": publisher,
"identifiers": identifiers,
"description": info.get("description", ""),
}
)
return items
@@ -346,7 +353,9 @@ class ISBNsearchMetadataProvider(MetadataProvider):
chunk_start = m.end()
# Stop at next <strong> or end of document.
chunk_end = strong_matches[idx + 1].start() if (idx + 1) < len(strong_matches) else len(html)
chunk_end = (
strong_matches[idx + 1].start() if (idx + 1) < len(strong_matches) else len(html)
)
chunk = html[chunk_start:chunk_end]
# Prefer stopping within the same paragraph when possible.
m_end = re.search(r"(?is)(</p>|<br\s*/?>)", chunk)
@@ -389,7 +398,10 @@ class ISBNsearchMetadataProvider(MetadataProvider):
# Prefer parsed title, but fall back to og:title if needed.
if not title:
m_og = re.search(r"(?is)<meta\b[^>]*property=['\"]og:title['\"][^>]*content=['\"](.*?)['\"][^>]*>", html)
m_og = re.search(
r"(?is)<meta\b[^>]*property=['\"]og:title['\"][^>]*content=['\"](.*?)['\"][^>]*>",
html,
)
if m_og:
title = self._strip_html_to_text(m_og.group(1))
@@ -534,15 +546,17 @@ class MusicBrainzMetadataProvider(MetadataProvider):
year = str(release_date)[:4] if release_date else ""
mbid = rec.get("id") or ""
items.append({
"title": title,
"artist": artist,
"album": album,
"year": year,
"provider": self.name,
"mbid": mbid,
"raw": rec,
})
items.append(
{
"title": title,
"artist": artist,
"album": album,
"year": year,
"provider": self.name,
"mbid": mbid,
"raw": rec,
}
)
return items
@@ -624,12 +638,7 @@ class YtdlpMetadataProvider(MetadataProvider):
# Provide basic columns for the standard metadata selection table.
# NOTE: This is best-effort; many extractors don't provide artist/album.
artist = (
info.get("artist")
or info.get("uploader")
or info.get("channel")
or ""
)
artist = info.get("artist") or info.get("uploader") or info.get("channel") or ""
album = info.get("album") or info.get("playlist_title") or ""
title = info.get("title") or ""
@@ -734,7 +743,9 @@ def list_metadata_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str
return availability
def get_metadata_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[MetadataProvider]:
def get_metadata_provider(
name: str, config: Optional[Dict[str, Any]] = None
) -> Optional[MetadataProvider]:
cls = _METADATA_PROVIDERS.get(name.lower())
if not cls:
return None
+62 -20
View File
@@ -154,7 +154,9 @@ def _check_lendable(session: requests.Session, edition_id: str) -> Tuple[bool, s
return False, "api-error"
def _resolve_archive_id(session: requests.Session, edition_id: str, ia_candidates: List[str]) -> str:
def _resolve_archive_id(
session: requests.Session, edition_id: str, ia_candidates: List[str]
) -> str:
# Prefer IA identifiers already present in search results.
if ia_candidates:
first = ia_candidates[0].strip()
@@ -420,18 +422,24 @@ class OpenLibrary(Provider):
email = openlibrary_config.get("email")
password = openlibrary_config.get("password")
if email or password:
return str(email) if email is not None else None, str(password) if password is not None else None
return str(email) if email is not None else None, (
str(password) if password is not None else None
)
archive_config = config.get("Archive")
if isinstance(archive_config, dict):
email = archive_config.get("email")
password = archive_config.get("password")
if email or password:
return str(email) if email is not None else None, str(password) if password is not None else None
return str(email) if email is not None else None, (
str(password) if password is not None else None
)
email = config.get("archive_org_email")
password = config.get("archive_org_password")
return str(email) if email is not None else None, str(password) if password is not None else None
return str(email) if email is not None else None, (
str(password) if password is not None else None
)
@staticmethod
def _archive_error_body(response: requests.Response) -> str:
@@ -452,10 +460,14 @@ class OpenLibrary(Provider):
try:
token_json = token_resp.json()
except Exception as exc:
raise RuntimeError(f"Archive login token parse failed: {exc}\n{cls._archive_error_body(token_resp)}")
raise RuntimeError(
f"Archive login token parse failed: {exc}\n{cls._archive_error_body(token_resp)}"
)
if not token_json.get("success"):
raise RuntimeError(f"Archive login token fetch failed\n{cls._archive_error_body(token_resp)}")
raise RuntimeError(
f"Archive login token fetch failed\n{cls._archive_error_body(token_resp)}"
)
token = (token_json.get("value") or {}).get("token")
if not token:
@@ -474,7 +486,9 @@ class OpenLibrary(Provider):
try:
login_json = login_resp.json()
except Exception as exc:
raise RuntimeError(f"Archive login parse failed: {exc}\n{cls._archive_error_body(login_resp)}")
raise RuntimeError(
f"Archive login parse failed: {exc}\n{cls._archive_error_body(login_resp)}"
)
if login_json.get("success") is False:
if login_json.get("value") == "bad_login":
@@ -484,16 +498,23 @@ class OpenLibrary(Provider):
return session
@classmethod
def _archive_loan(cls, session: requests.Session, book_id: str, *, verbose: bool = True) -> requests.Session:
def _archive_loan(
cls, session: requests.Session, book_id: str, *, verbose: bool = True
) -> requests.Session:
data = {"action": "grant_access", "identifier": book_id}
session.post("https://archive.org/services/loans/loan/searchInside.php", data=data, timeout=30)
session.post(
"https://archive.org/services/loans/loan/searchInside.php", data=data, timeout=30
)
data["action"] = "browse_book"
response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
if response.status_code == 400:
try:
err = (response.json() or {}).get("error")
if err == "This book is not available to borrow at this time. Please try again later.":
if (
err
== "This book is not available to borrow at this time. Please try again later."
):
raise cls.BookNotAvailableError("Book is waitlisted or in use")
raise RuntimeError(f"Borrow failed: {err or response.text}")
except cls.BookNotAvailableError:
@@ -574,7 +595,9 @@ class OpenLibrary(Provider):
return False, "archive-metadata-error"
@staticmethod
def _archive_get_book_infos(session: requests.Session, url: str) -> Tuple[str, List[str], Dict[str, Any]]:
def _archive_get_book_infos(
session: requests.Session, url: str
) -> Tuple[str, List[str], Dict[str, Any]]:
"""Extract page links from Archive.org book reader."""
r = session.get(url, timeout=30).text
@@ -748,7 +771,9 @@ class OpenLibrary(Provider):
continue
filename = str(file_info.get("name", ""))
if filename.endswith(".pdf") and file_info.get("source") == "original":
pdf_url = f"https://archive.org/download/{book_id}/{filename.replace(' ', '%20')}"
pdf_url = (
f"https://archive.org/download/{book_id}/{filename.replace(' ', '%20')}"
)
check_response = requests.head(pdf_url, timeout=4, allow_redirects=True)
if check_response.status_code == 200:
return True, pdf_url
@@ -1040,7 +1065,9 @@ class OpenLibrary(Provider):
session_local = requests.Session()
try:
archive_id_local = _resolve_archive_id(session_local, edition_id_local, ia_ids_local)
archive_id_local = _resolve_archive_id(
session_local, edition_id_local, ia_ids_local
)
except Exception:
archive_id_local = ""
@@ -1062,7 +1089,9 @@ class OpenLibrary(Provider):
return "unavailable", reason_local, archive_id_local, ""
availability_rows: List[Tuple[str, str, str, str]] = [("unknown", "", "", "") for _ in range(len(docs))]
availability_rows: List[Tuple[str, str, str, str]] = [
("unknown", "", "", "") for _ in range(len(docs))
]
if docs:
max_workers = min(8, max(1, len(docs)))
done = 0
@@ -1080,7 +1109,6 @@ class OpenLibrary(Provider):
availability_rows[i] = ("unknown", "", "", "")
done += 1
for idx, doc in enumerate(docs):
if not isinstance(doc, dict):
continue
@@ -1150,8 +1178,12 @@ class OpenLibrary(Provider):
table="openlibrary",
title=book_title,
path=(
f"https://openlibrary.org/books/{edition_id}" if edition_id else (
f"https://openlibrary.org{work_key}" if isinstance(work_key, str) and work_key.startswith("/") else "https://openlibrary.org"
f"https://openlibrary.org/books/{edition_id}"
if edition_id
else (
f"https://openlibrary.org{work_key}"
if isinstance(work_key, str) and work_key.startswith("/")
else "https://openlibrary.org"
)
),
detail=(
@@ -1257,7 +1289,11 @@ class OpenLibrary(Provider):
out_path,
session=self._session,
progress_callback=(
(lambda downloaded, total, label: progress_callback("bytes", downloaded, total, label))
(
lambda downloaded, total, label: progress_callback(
"bytes", downloaded, total, label
)
)
if progress_callback is not None
else None
),
@@ -1315,7 +1351,10 @@ class OpenLibrary(Provider):
except Exception:
pass
urls = [f"https://archive.org/borrow/{archive_id}", f"https://archive.org/details/{archive_id}"]
urls = [
f"https://archive.org/borrow/{archive_id}",
f"https://archive.org/details/{archive_id}",
]
title = safe_title
links: Optional[List[str]] = None
last_exc: Optional[Exception] = None
@@ -1358,7 +1397,10 @@ class OpenLibrary(Provider):
pdf_bytes = _image_paths_to_pdf_bytes(images)
if not pdf_bytes:
# Keep images folder for manual conversion.
log("[openlibrary] PDF conversion failed; keeping images folder", file=sys.stderr)
log(
"[openlibrary] PDF conversion failed; keeping images folder",
file=sys.stderr,
)
return Path(temp_dir)
try:
+587 -558
View File
File diff suppressed because it is too large Load Diff
+1052 -1028
View File
File diff suppressed because it is too large Load Diff
+5 -1
View File
@@ -20,6 +20,7 @@ class YouTube(Provider):
# Use the yt_dlp Python module (installed via requirements.txt).
try:
import yt_dlp # type: ignore
ydl_opts: Dict[str, Any] = {"quiet": True, "skip_download": True, "extract_flat": True}
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
search_query = f"ytsearch{limit}:{query}"
@@ -34,7 +35,9 @@ class YouTube(Provider):
duration = video_data.get("duration", 0)
view_count = video_data.get("view_count", 0)
duration_str = f"{int(duration // 60)}:{int(duration % 60):02d}" if duration else ""
duration_str = (
f"{int(duration // 60)}:{int(duration % 60):02d}" if duration else ""
)
views_str = f"{view_count:,}" if view_count else ""
results.append(
@@ -67,6 +70,7 @@ class YouTube(Provider):
def validate(self) -> bool:
try:
import yt_dlp # type: ignore
return True
except Exception:
return False
+3 -1
View File
@@ -36,7 +36,9 @@ class ZeroXZero(Provider):
if pipe_obj is not None:
from Store import Store
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(pipe_obj, uploaded_url)
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(
pipe_obj, uploaded_url
)
except Exception:
pass
+3 -1
View File
@@ -83,7 +83,9 @@ class Provider(ABC):
return True
def selector(self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any) -> bool:
def selector(
self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any
) -> bool:
"""Optional hook for handling `@N` selection semantics.
The CLI can delegate selection behavior to a provider/store instead of
+3 -1
View File
@@ -68,7 +68,9 @@ def download_file(
if progress_callback is not None:
progress_callback(downloaded, total, label)
elif bar is not None:
bar.update(downloaded=downloaded, total=total, label=label, file=sys.stderr)
bar.update(
downloaded=downloaded, total=total, label=label, file=sys.stderr
)
except Exception:
pass
+3 -1
View File
@@ -96,7 +96,9 @@ def list_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
return availability
def get_search_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[SearchProvider]:
def get_search_provider(
name: str, config: Optional[Dict[str, Any]] = None
) -> Optional[SearchProvider]:
"""Get a search-capable provider by name (compat API)."""
provider = get_provider(name, config)
+7 -4
View File
@@ -6,6 +6,7 @@ workers start, progress, or finish. Intended for CLI background workflows.
Filters to show only workers related to the current pipeline session to avoid
cluttering the terminal with workers from previous sessions.
"""
from __future__ import annotations
from typing import Any, Callable, Dict, Optional, Set
@@ -72,18 +73,20 @@ class BackgroundNotifier:
worker_id = str(worker.get("id") or "").strip()
if not worker_id:
continue
# If filtering is enabled, skip workers not in this session
if self._filter_enabled and worker_id not in self.session_worker_ids:
continue
status = str(worker.get("status") or "running")
# Overlay mode: only emit on completion; suppress start/progress spam
if self.overlay_mode:
if status in ("completed", "finished", "error"):
progress_val = worker.get("progress") or worker.get("progress_percent") or ""
step = str(worker.get("current_step") or worker.get("description") or "").strip()
step = str(
worker.get("current_step") or worker.get("description") or ""
).strip()
signature = f"{status}|{progress_val}|{step}"
if self._last_state.get(worker_id) == signature:
@@ -157,7 +160,7 @@ def ensure_background_notifier(
overlay_mode: bool = False,
) -> Optional[BackgroundNotifier]:
"""Attach a BackgroundNotifier to a WorkerManager if not already present.
Args:
manager: WorkerManager instance
output: Function to call for printing updates
+164 -106
View File
@@ -6,6 +6,7 @@ Lean, focused downloader without event infrastructure overhead.
- Tag extraction via metadata.extract_ytdlp_tags()
- Logging via helper.logger.log()
"""
from __future__ import annotations
import glob # noqa: F401
@@ -97,7 +98,9 @@ def is_url_supported_by_ytdlp(url: str) -> bool:
return False
def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[str] = None) -> Optional[List[Dict[str, Any]]]:
def list_formats(
url: str, no_playlist: bool = False, playlist_items: Optional[str] = None
) -> Optional[List[Dict[str, Any]]]:
"""Get list of available formats for a URL using yt-dlp."""
_ensure_yt_dlp_ready()
@@ -125,19 +128,21 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s
result_formats = []
for fmt in formats:
result_formats.append({
"format_id": fmt.get("format_id", ""),
"format": fmt.get("format", ""),
"ext": fmt.get("ext", ""),
"resolution": fmt.get("resolution", ""),
"width": fmt.get("width"),
"height": fmt.get("height"),
"fps": fmt.get("fps"),
"vcodec": fmt.get("vcodec", "none"),
"acodec": fmt.get("acodec", "none"),
"filesize": fmt.get("filesize"),
"tbr": fmt.get("tbr"),
})
result_formats.append(
{
"format_id": fmt.get("format_id", ""),
"format": fmt.get("format", ""),
"ext": fmt.get("ext", ""),
"resolution": fmt.get("resolution", ""),
"width": fmt.get("width"),
"height": fmt.get("height"),
"fps": fmt.get("fps"),
"vcodec": fmt.get("vcodec", "none"),
"acodec": fmt.get("acodec", "none"),
"filesize": fmt.get("filesize"),
"tbr": fmt.get("tbr"),
}
)
debug(f"Found {len(result_formats)} available formats")
return result_formats
@@ -147,48 +152,50 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s
return None
def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str], quiet: bool = False) -> tuple[Optional[str], Dict[str, Any]]:
def _download_with_sections_via_cli(
url: str, ytdl_options: Dict[str, Any], sections: List[str], quiet: bool = False
) -> tuple[Optional[str], Dict[str, Any]]:
"""Download each section separately so merge-file can combine them.
yt-dlp with multiple --download-sections args merges them into one file.
We need separate files for merge-file, so download each section individually.
Uses hash-based filenames for sections (not title-based) to prevent yt-dlp from
thinking sections are already downloaded. The title is extracted and stored in tags.
Returns:
(session_id, first_section_info_dict) - session_id for finding files, info dict for metadata extraction
"""
sections_list = ytdl_options.get("download_sections", [])
if not sections_list:
return "", {}
# Generate a unique hash-based ID for this download session
# This ensures different videos/downloads don't have filename collisions
session_id = hashlib.md5(
(url + str(time.time()) + ''.join(random.choices(string.ascii_letters, k=10))).encode()
(url + str(time.time()) + "".join(random.choices(string.ascii_letters, k=10))).encode()
).hexdigest()[:12]
first_section_info = None
title_from_first = None
# Download each section separately with unique output template using session ID
for section_idx, section in enumerate(sections_list, 1):
# Build unique output template for this section using session-based filename
# e.g., "{session_id}_{section_idx}.ext" - simple and unique per section
base_outtmpl = ytdl_options.get("outtmpl", "%(title)s.%(ext)s")
output_dir_path = Path(base_outtmpl).parent
# Use session_id + section index for temp filename
# e.g., "/path/{session_id}_1.%(ext)s"
filename_tmpl = f"{session_id}_{section_idx}"
if base_outtmpl.endswith(".%(ext)s"):
filename_tmpl += ".%(ext)s"
# Use Path to handle separators correctly for the OS
section_outtmpl = str(output_dir_path / filename_tmpl)
# For the first section, extract metadata first (separate call)
if section_idx == 1:
metadata_cmd = ["yt-dlp", "--dump-json", "--skip-download"]
@@ -198,14 +205,14 @@ def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sect
if ytdl_options.get("noplaylist"):
metadata_cmd.append("--no-playlist")
metadata_cmd.append(url)
try:
meta_result = subprocess.run(metadata_cmd, capture_output=True, text=True)
if meta_result.returncode == 0 and meta_result.stdout:
try:
info_dict = json.loads(meta_result.stdout.strip())
first_section_info = info_dict
title_from_first = info_dict.get('title')
title_from_first = info_dict.get("title")
if not quiet:
debug(f"Extracted title from metadata: {title_from_first}")
except json.JSONDecodeError:
@@ -214,50 +221,54 @@ def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sect
except Exception as e:
if not quiet:
debug(f"Error extracting metadata: {e}")
# Build yt-dlp command for downloading this section
cmd = ["yt-dlp"]
# Add format
if ytdl_options.get("format"):
cmd.extend(["-f", ytdl_options["format"]])
# Add ONLY this section (not all sections)
cmd.extend(["--download-sections", section])
# Add force-keyframes-at-cuts if specified
if ytdl_options.get("force_keyframes_at_cuts"):
cmd.append("--force-keyframes-at-cuts")
# Add output template for this section
cmd.extend(["-o", section_outtmpl])
# Add cookies file if present
if ytdl_options.get("cookiefile"):
# Convert backslashes to forward slashes for better compatibility
cookies_path = ytdl_options["cookiefile"].replace("\\", "/")
cmd.extend(["--cookies", cookies_path])
# Add no-playlist if specified
if ytdl_options.get("noplaylist"):
cmd.append("--no-playlist")
# Add the URL
cmd.append(url)
if not quiet:
debug(f"Running yt-dlp for section {section_idx}/{len(sections_list)}: {section}")
debug(f"Command: {' '.join(cmd)}")
# Run the subprocess - don't capture output so progress is shown
try:
result = subprocess.run(cmd)
if result.returncode != 0:
raise DownloadError(f"yt-dlp subprocess failed for section {section_idx} with code {result.returncode}")
raise DownloadError(
f"yt-dlp subprocess failed for section {section_idx} with code {result.returncode}"
)
except Exception as exc:
raise DownloadError(f"yt-dlp subprocess error for section {section_idx}: {exc}") from exc
raise DownloadError(
f"yt-dlp subprocess error for section {section_idx}: {exc}"
) from exc
return session_id, first_section_info or {}
@@ -299,33 +310,39 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
else: # video
base_options["format"] = opts.ytdl_format or "bestvideo+bestaudio/best"
base_options["format_sort"] = [
"res:4320", "res:2880", "res:2160", "res:1440", "res:1080", "res:720", "res"
"res:4320",
"res:2880",
"res:2160",
"res:1440",
"res:1080",
"res:720",
"res",
]
# Add clip sections if provided (yt-dlp will download only these sections)
if opts.clip_sections:
# Parse section ranges like "48-65,120-152,196-205" (seconds)
# Parse section ranges like "48-65,120-152,196-205" (seconds)
# and convert to yt-dlp format: "*HH:MM:SS-HH:MM:SS,*HH:MM:SS-HH:MM:SS"
sections = []
for section_range in opts.clip_sections.split(','):
for section_range in opts.clip_sections.split(","):
try:
start_str, end_str = section_range.strip().split('-')
start_str, end_str = section_range.strip().split("-")
start_sec = float(start_str)
end_sec = float(end_str)
# Convert seconds to HH:MM:SS format
def sec_to_hhmmss(seconds):
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
secs = int(seconds % 60)
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
start_time = sec_to_hhmmss(start_sec)
end_time = sec_to_hhmmss(end_sec)
sections.append(f"*{start_time}-{end_time}")
except (ValueError, AttributeError):
pass
if sections:
# Pass each section as a separate element in the list (yt-dlp expects multiple --download-sections args)
base_options["download_sections"] = sections
@@ -412,41 +429,41 @@ def _extract_sha256(info: Dict[str, Any]) -> Optional[str]:
def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
"""Extract the actual download link from LibGen redirect URL.
LibGen url like https://libgen.gl/file.php?id=123456 redirect to
actual mirror url. This follows the redirect chain to get the real file.
Args:
libgen_url: LibGen file.php URL
Returns:
Actual download URL or None if extraction fails
"""
try:
import requests
from urllib.parse import urlparse
# Check if this is a LibGen URL
parsed = urlparse(libgen_url)
if 'libgen' not in parsed.netloc.lower():
if "libgen" not in parsed.netloc.lower():
return None
if '/file.php' not in parsed.path.lower():
if "/file.php" not in parsed.path.lower():
return None
# LibGen redirects to actual mirrors, follow redirects to get final URL
session = requests.Session()
session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
})
session.headers.update(
{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
)
debug(f"Following LibGen redirect chain for: {libgen_url}")
# First, get the page and look for direct download link
try:
response = session.get(libgen_url, timeout=10, allow_redirects=True)
final_url = response.url
# Try to find actual download link in the page
try:
try:
@@ -462,8 +479,12 @@ def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
continue
href_lower = href.lower()
if "get.php" in href_lower or href_lower.endswith((".pdf", ".epub", ".djvu", ".mobi")):
download_url = href if href.startswith("http") else urljoin(final_url, href)
if "get.php" in href_lower or href_lower.endswith(
(".pdf", ".epub", ".djvu", ".mobi")
):
download_url = (
href if href.startswith("http") else urljoin(final_url, href)
)
debug(f"Found download link: {download_url}")
return download_url
else:
@@ -477,19 +498,23 @@ def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
if not href or href.lower().startswith("javascript:"):
continue
href_lower = href.lower()
if "get.php" in href_lower or href_lower.endswith((".pdf", ".epub", ".djvu", ".mobi")):
download_url = href if href.startswith("http") else urljoin(final_url, href)
if "get.php" in href_lower or href_lower.endswith(
(".pdf", ".epub", ".djvu", ".mobi")
):
download_url = (
href if href.startswith("http") else urljoin(final_url, href)
)
debug(f"Found download link: {download_url}")
return download_url
except Exception:
pass
# If we followed redirects successfully, return the final URL
# This handles cases where libgen redirects to a direct download mirror
if final_url != libgen_url:
debug(f"LibGen resolved to mirror: {final_url}")
return final_url
except requests.RequestException as e:
log(f"Error following LibGen redirects: {e}", file=sys.stderr)
# Try head request as fallback
@@ -500,9 +525,9 @@ def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
return response.url
except:
pass
return None
except Exception as e:
log(f"Error resolving LibGen URL: {e}", file=sys.stderr)
return None
@@ -521,7 +546,7 @@ def _download_direct_file(
from urllib.parse import unquote, urlparse, parse_qs
import re
def _sanitize_filename(name: str) -> str:
# Windows-safe filename sanitization.
# Keep it simple: strip path parts, drop invalid chars, collapse whitespace.
@@ -563,27 +588,27 @@ def _download_direct_file(
# Extract filename from URL
parsed_url = urlparse(url)
url_path = parsed_url.path
# Try to get filename from query parameters first (for LibGen and similar services)
# e.g., ?filename=Book+Title.pdf or &download=filename.pdf
filename = None
if parsed_url.query:
query_params = parse_qs(parsed_url.query)
for param_name in ('filename', 'download', 'file', 'name'):
for param_name in ("filename", "download", "file", "name"):
if param_name in query_params and query_params[param_name]:
filename = query_params[param_name][0]
filename = unquote(filename)
break
# If not found in query params, extract from URL path
if not filename or not filename.strip():
filename = url_path.split("/")[-1] if url_path else ""
filename = unquote(filename)
# Remove query strings from filename if any
if "?" in filename:
filename = filename.split("?")[0]
# Try to get real filename from Content-Disposition header (HEAD request)
content_type = ""
try:
@@ -627,7 +652,12 @@ def _download_direct_file(
with HTTPClient(timeout=10.0) as client:
with client._request_stream("GET", url, follow_redirects=True) as resp:
resp.raise_for_status()
ct = str(resp.headers.get("content-type", "") or "").split(";", 1)[0].strip().lower()
ct = (
str(resp.headers.get("content-type", "") or "")
.split(";", 1)[0]
.strip()
.lower()
)
if ct.startswith("text/html"):
raise DownloadError("URL appears to be an HTML page, not a direct file")
except DownloadError:
@@ -635,7 +665,7 @@ def _download_direct_file(
except Exception:
# If we can't probe, keep going; later logic may still infer a safe extension.
pass
# Apply suggested filename (from provider title) if given.
suggested = _sanitize_filename(suggested_filename) if suggested_filename else ""
if suggested:
@@ -683,7 +713,9 @@ def _download_direct_file(
# Final guardrail: if filename is empty, refuse rather than inventing `download.bin`.
if not filename or not str(filename).strip():
raise DownloadError("Could not determine filename for URL (no Content-Disposition and no path filename)")
raise DownloadError(
"Could not determine filename for URL (no Content-Disposition and no path filename)"
)
file_path = _unique_path(output_dir / filename)
@@ -719,12 +751,18 @@ def _download_direct_file(
if transfer_started[0]:
return
try:
total_val: Optional[int] = int(content_length) if isinstance(content_length, int) and content_length > 0 else None
total_val: Optional[int] = (
int(content_length)
if isinstance(content_length, int) and content_length > 0
else None
)
except Exception:
total_val = None
try:
if hasattr(pipeline_progress, "begin_transfer"):
pipeline_progress.begin_transfer(label=str(filename or "download"), total=total_val)
pipeline_progress.begin_transfer(
label=str(filename or "download"), total=total_val
)
transfer_started[0] = True
except Exception:
return
@@ -737,7 +775,11 @@ def _download_direct_file(
try:
if pipeline_progress is not None and hasattr(pipeline_progress, "update_transfer"):
_maybe_begin_transfer(content_length)
total_val: Optional[int] = int(content_length) if isinstance(content_length, int) and content_length > 0 else None
total_val: Optional[int] = (
int(content_length)
if isinstance(content_length, int) and content_length > 0
else None
)
pipeline_progress.update_transfer(
label=str(filename or "download"),
completed=int(bytes_downloaded) if bytes_downloaded is not None else None,
@@ -790,14 +832,21 @@ def _download_direct_file(
pass
try:
if pipeline_progress is not None and transfer_started[0] and hasattr(pipeline_progress, "finish_transfer"):
if (
pipeline_progress is not None
and transfer_started[0]
and hasattr(pipeline_progress, "finish_transfer")
):
pipeline_progress.finish_transfer(label=str(filename or "download"))
except Exception:
pass
try:
if progress_bar is not None:
avg_speed_str = progress_bar.format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0) + "/s"
avg_speed_str = (
progress_bar.format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0)
+ "/s"
)
else:
avg_speed_str = f"{(downloaded_bytes[0] / elapsed if elapsed > 0 else 0):.1f} B/s"
except Exception:
@@ -835,9 +884,9 @@ def _download_direct_file(
# Only use filename as a title tag if we couldn't extract any meaningful tags
# This prevents duplicate title: tags when the filename could be mistaken for metadata
if not any(t.startswith('title:') for t in tags):
if not any(t.startswith("title:") for t in tags):
# Re-extract tags with filename as title only if needed
info['title'] = filename
info["title"] = filename
tags = []
if extract_ytdlp_tags:
try:
@@ -866,7 +915,11 @@ def _download_direct_file(
except Exception:
pass
try:
if pipeline_progress is not None and transfer_started[0] and hasattr(pipeline_progress, "finish_transfer"):
if (
pipeline_progress is not None
and transfer_started[0]
and hasattr(pipeline_progress, "finish_transfer")
):
pipeline_progress.finish_transfer(label=str(filename or "download"))
except Exception:
pass
@@ -884,7 +937,11 @@ def _download_direct_file(
except Exception:
pass
try:
if pipeline_progress is not None and transfer_started[0] and hasattr(pipeline_progress, "finish_transfer"):
if (
pipeline_progress is not None
and transfer_started[0]
and hasattr(pipeline_progress, "finish_transfer")
):
pipeline_progress.finish_transfer(label=str(filename or "download"))
except Exception:
pass
@@ -902,31 +959,33 @@ def _download_direct_file(
raise DownloadError(f"Error downloading file: {exc}") from exc
def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) -> Optional[Dict[str, Any]]:
def probe_url(
url: str, no_playlist: bool = False, timeout_seconds: int = 15
) -> Optional[Dict[str, Any]]:
"""Probe URL to extract metadata WITHOUT downloading.
Args:
url: URL to probe
no_playlist: If True, ignore playlists and probe only the single video
timeout_seconds: Max seconds to wait for probe (default 15s)
Returns:
Dict with keys: extractor, title, entries (if playlist), duration, etc.
Returns None if not supported by yt-dlp or on timeout.
"""
if not is_url_supported_by_ytdlp(url):
return None
# Wrap probe in timeout to prevent hanging on large playlists
import threading
from typing import cast
result_container: List[Optional[Any]] = [None, None] # [result, error]
def _do_probe() -> None:
try:
_ensure_yt_dlp_ready()
assert yt_dlp is not None
# Extract info without downloading
# Use extract_flat='in_playlist' to get full metadata for playlist items
@@ -939,20 +998,20 @@ def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) ->
"extract_flat": "in_playlist", # Get playlist with metadata for each entry
"noprogress": True, # No progress bars
}
# Cookies are optional for probing; callers should pass cookiefile via DownloadOptions when needed.
# Add no_playlist option if specified
if no_playlist:
ydl_opts["noplaylist"] = True
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
info = ydl.extract_info(url, download=False)
if not isinstance(info, dict):
result_container[0] = None
return
# Extract relevant fields
result_container[0] = {
"extractor": info.get("extractor", ""),
@@ -966,20 +1025,20 @@ def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) ->
except Exception as exc:
log(f"Probe error for {url}: {exc}")
result_container[1] = exc
thread = threading.Thread(target=_do_probe, daemon=False)
thread.start()
thread.join(timeout=timeout_seconds)
if thread.is_alive():
# Probe timed out - return None to fall back to direct download
debug(f"Probe timeout for {url} (>={timeout_seconds}s), proceeding with download")
return None
if result_container[1] is not None:
# Probe error - return None to proceed anyway
return None
return cast(Optional[Dict[str, Any]], result_container[0])
@@ -991,4 +1050,3 @@ __all__ = [
"DownloadOptions",
"DownloadMediaResult",
]
+6 -1
View File
@@ -8,6 +8,7 @@ It is intentionally lightweight and safe to import early at process
startup so the CLI can detect and surface environment problems before
trying to import cmdlets or other modules.
"""
from __future__ import annotations
import importlib
@@ -99,7 +100,11 @@ def check_urllib3_compat() -> Tuple[bool, str]:
return False, "\n".join(lines)
# Looks good
debug("urllib3 appears usable: version=%s, exceptions=%s", getattr(urllib3, "__version__", "<unknown>"), hasattr(urllib3, "exceptions"))
debug(
"urllib3 appears usable: version=%s, exceptions=%s",
getattr(urllib3, "__version__", "<unknown>"),
hasattr(urllib3, "exceptions"),
)
return True, "OK"
+33 -31
View File
@@ -19,41 +19,43 @@ _server_port: int = 8001
class FileServerHandler(SimpleHTTPRequestHandler):
"""HTTP request handler for file serving."""
def do_GET(self):
"""Handle GET requests."""
# Parse the path
parsed_path = urllib.parse.urlparse(self.path)
file_path = urllib.parse.unquote(parsed_path.path)
# Remove leading slash
if file_path.startswith('/'):
if file_path.startswith("/"):
file_path = file_path[1:]
# Decode the file path (it's URL encoded)
try:
full_path = Path(file_path).resolve()
# Security check: ensure the path is within allowed directories
# For now, allow all paths (can be restricted later)
if full_path.is_file() and full_path.exists():
# Serve the file
logger.debug(f"Serving file: {full_path}")
# Determine content type
content_type, _ = mimetypes.guess_type(str(full_path))
if content_type is None:
content_type = 'application/octet-stream'
content_type = "application/octet-stream"
try:
with open(full_path, 'rb') as f:
with open(full_path, "rb") as f:
file_content = f.read()
self.send_response(200)
self.send_header('Content-type', content_type)
self.send_header('Content-Length', str(len(file_content)))
self.send_header('Content-Disposition', f'attachment; filename="{full_path.name}"')
self.send_header("Content-type", content_type)
self.send_header("Content-Length", str(len(file_content)))
self.send_header(
"Content-Disposition", f'attachment; filename="{full_path.name}"'
)
self.end_headers()
self.wfile.write(file_content)
logger.info(f"Successfully served file: {full_path.name}")
@@ -66,11 +68,11 @@ class FileServerHandler(SimpleHTTPRequestHandler):
logger.warning(f"File not found: {full_path}")
self.send_error(404, "File not found")
return
except Exception as e:
logger.error(f"Error handling request: {e}")
self.send_error(400, "Bad request")
def log_message(self, format, *args):
"""Override to use our logger instead of stderr."""
logger.debug(format % args)
@@ -92,35 +94,35 @@ def get_local_ip() -> Optional[str]:
def start_file_server(port: int = 8001) -> Optional[str]:
"""Start the HTTP file server.
Args:
port: Port to serve on
Returns:
Server URL if successful, None otherwise
"""
global _file_server, _server_thread, _server_port
if _file_server is not None:
logger.debug(f"File server already running on port {_server_port}")
local_ip = get_local_ip()
if local_ip:
return f"http://{local_ip}:{_server_port}"
return None
try:
_server_port = port
# Create server
server_address = ('', port)
server_address = ("", port)
_file_server = HTTPServer(server_address, FileServerHandler)
# Start in daemon thread
_server_thread = threading.Thread(target=_file_server.serve_forever, daemon=True)
_server_thread.start()
logger.info(f"File server started on port {port}")
# Get local IP
local_ip = get_local_ip()
if local_ip:
@@ -130,7 +132,7 @@ def start_file_server(port: int = 8001) -> Optional[str]:
else:
logger.warning("Could not determine local IP")
return None
except Exception as e:
logger.error(f"Failed to start file server: {e}")
_file_server = None
@@ -141,7 +143,7 @@ def start_file_server(port: int = 8001) -> Optional[str]:
def stop_file_server():
"""Stop the HTTP file server."""
global _file_server, _server_thread
if _file_server is not None:
try:
_file_server.shutdown()
@@ -156,25 +158,25 @@ def stop_file_server():
def get_file_url(file_path: Path, server_url: Optional[str] = None) -> Optional[str]:
"""Get the HTTP URL for a file.
Args:
file_path: Path to the file
server_url: Base server URL (gets determined if None)
Returns:
HTTP URL to the file, or None if server not running
"""
if not file_path.exists():
logger.warning(f"File does not exist: {file_path}")
return None
if server_url is None:
local_ip = get_local_ip()
if not local_ip:
logger.error("Cannot determine local IP for file URL")
return None
server_url = f"http://{local_ip}:{_server_port}"
# URL encode the file path
encoded_path = urllib.parse.quote(str(file_path.resolve()))
return f"{server_url}/{encoded_path}"
+26 -20
View File
@@ -10,51 +10,56 @@ from rich_display import console_for
_DEBUG_ENABLED = False
_thread_local = threading.local()
def set_thread_stream(stream):
"""Set a custom output stream for the current thread."""
_thread_local.stream = stream
def get_thread_stream():
"""Get the custom output stream for the current thread, if any."""
return getattr(_thread_local, 'stream', None)
return getattr(_thread_local, "stream", None)
def set_debug(enabled: bool) -> None:
"""Enable or disable debug logging."""
global _DEBUG_ENABLED
_DEBUG_ENABLED = enabled
def is_debug_enabled() -> bool:
"""Check if debug logging is enabled."""
return _DEBUG_ENABLED
def debug(*args, **kwargs) -> None:
"""Print debug message if debug logging is enabled.
Automatically prepends [filename.function_name] to all output.
"""
if not _DEBUG_ENABLED:
return
# Check if stderr has been redirected to /dev/null (quiet mode)
# If so, skip output to avoid queuing in background worker's capture
try:
stderr_name = getattr(sys.stderr, 'name', '')
if 'nul' in str(stderr_name).lower() or '/dev/null' in str(stderr_name):
stderr_name = getattr(sys.stderr, "name", "")
if "nul" in str(stderr_name).lower() or "/dev/null" in str(stderr_name):
return
except Exception:
pass
# Check for thread-local stream first
stream = get_thread_stream()
if stream:
kwargs['file'] = stream
kwargs["file"] = stream
# Set default to stderr for debug messages
elif 'file' not in kwargs:
kwargs['file'] = sys.stderr
elif "file" not in kwargs:
kwargs["file"] = sys.stderr
# Prepend DEBUG label
args = ("DEBUG:", *args)
# Use the same logic as log()
log(*args, **kwargs)
@@ -150,12 +155,13 @@ def debug_inspect(
value=value,
)
def log(*args, **kwargs) -> None:
"""Print with automatic file.function prefix.
Automatically prepends [filename.function_name] to all output.
Defaults to stdout if not specified.
Example:
log("Upload started") # Output: [add_file.run] Upload started
"""
@@ -170,7 +176,7 @@ def log(*args, **kwargs) -> None:
end = kwargs.pop("end", "\n")
console_for(file).print(*args, sep=sep, end=end)
return
caller_frame = frame.f_back
if caller_frame is None:
file = kwargs.pop("file", sys.stdout)
@@ -178,21 +184,21 @@ def log(*args, **kwargs) -> None:
end = kwargs.pop("end", "\n")
console_for(file).print(*args, sep=sep, end=end)
return
try:
# Get file name without extension
file_name = Path(caller_frame.f_code.co_filename).stem
# Get function name
func_name = caller_frame.f_code.co_name
# Check for thread-local stream first
stream = get_thread_stream()
if stream:
kwargs['file'] = stream
kwargs["file"] = stream
# Set default to stdout if not specified
elif 'file' not in kwargs:
kwargs['file'] = sys.stdout
elif "file" not in kwargs:
kwargs["file"] = sys.stdout
file = kwargs.pop("file", sys.stdout)
sep = kwargs.pop("sep", " ")
+18 -6
View File
@@ -33,7 +33,9 @@ class PipelineProgress:
pipe_idx: int = 0
try:
stage_ctx = self._ctx.get_stage_context() if hasattr(self._ctx, "get_stage_context") else None
stage_ctx = (
self._ctx.get_stage_context() if hasattr(self._ctx, "get_stage_context") else None
)
maybe_idx = getattr(stage_ctx, "pipe_index", None) if stage_ctx is not None else None
if isinstance(maybe_idx, int):
pipe_idx = int(maybe_idx)
@@ -108,7 +110,9 @@ class PipelineProgress:
except Exception:
return
def update_transfer(self, *, label: str, completed: Optional[int], total: Optional[int] = None) -> None:
def update_transfer(
self, *, label: str, completed: Optional[int], total: Optional[int] = None
) -> None:
ui, _ = self.ui_and_pipe_index()
if ui is None:
return
@@ -144,11 +148,15 @@ class PipelineProgress:
except Exception:
return
def ensure_local_ui(self, *, label: str, total_items: int, items_preview: Optional[Sequence[Any]] = None) -> bool:
def ensure_local_ui(
self, *, label: str, total_items: int, items_preview: Optional[Sequence[Any]] = None
) -> bool:
"""Start a local PipelineLiveProgress panel if no shared UI exists."""
try:
existing = self._ctx.get_live_progress() if hasattr(self._ctx, "get_live_progress") else None
existing = (
self._ctx.get_live_progress() if hasattr(self._ctx, "get_live_progress") else None
)
except Exception:
existing = None
@@ -170,7 +178,9 @@ class PipelineProgress:
self._local_attached = False
try:
ui.begin_pipe(0, total_items=max(1, int(total_items)), items_preview=list(items_preview or []))
ui.begin_pipe(
0, total_items=max(1, int(total_items)), items_preview=list(items_preview or [])
)
except Exception:
pass
@@ -210,7 +220,9 @@ class PipelineProgress:
total_items: int,
items_preview: Optional[Sequence[Any]] = None,
) -> Iterator["PipelineProgress"]:
created = self.ensure_local_ui(label=label, total_items=total_items, items_preview=items_preview)
created = self.ensure_local_ui(
label=label, total_items=total_items, items_preview=items_preview
)
try:
yield self
finally:
+9 -2
View File
@@ -14,8 +14,15 @@ from models import ProgressBar
_BAR = ProgressBar()
def print_progress(filename: str, current: int, total: int, speed: float = 0, end: str = "\r") -> None:
_BAR.update(downloaded=int(current), total=int(total) if total else None, label=str(filename or "progress"), file=sys.stderr)
def print_progress(
filename: str, current: int, total: int, speed: float = 0, end: str = "\r"
) -> None:
_BAR.update(
downloaded=int(current),
total=int(total) if total else None,
label=str(filename or "progress"),
file=sys.stderr,
)
def print_final_progress(filename: str, total: int, elapsed: float) -> None:
+83 -47
View File
@@ -1,4 +1,5 @@
"""Background task handling and IPC helpers for mpv integration."""
from __future__ import annotations
import errno
import json
@@ -11,16 +12,18 @@ from SYS.logger import log
import threading
import time
from typing import IO, Iterable
def connect_ipc(path: str, timeout: float = 5.0) -> IO[bytes] | None:
"""Connect to the mpv IPC server located at *path*."""
deadline = time.time() + timeout
if not path:
return None
if os.name == 'nt':
if os.name == "nt":
# mpv exposes a named pipe on Windows. Keep retrying until it is ready.
while True:
try:
return open(path, 'r+b', buffering=0)
return open(path, "r+b", buffering=0)
except FileNotFoundError:
if time.time() > deadline:
return None
@@ -38,7 +41,7 @@ def connect_ipc(path: str, timeout: float = 5.0) -> IO[bytes] | None:
while True:
try:
sock.connect(path)
return sock.makefile('r+b', buffering=0)
return sock.makefile("r+b", buffering=0)
except FileNotFoundError:
if time.time() > deadline:
return None
@@ -49,74 +52,92 @@ def connect_ipc(path: str, timeout: float = 5.0) -> IO[bytes] | None:
if time.time() > deadline:
return None
time.sleep(0.05)
def ipc_sender(ipc: IO[bytes] | None):
"""Create a helper function for sending script messages via IPC."""
if ipc is None:
def _noop(_event: str, _payload: dict) -> None:
return None
return _noop
lock = threading.Lock()
def _send(event: str, payload: dict) -> None:
message = json.dumps({'command': ['script-message', event, json.dumps(payload)]}, ensure_ascii=False)
encoded = message.encode('utf-8') + b'\n'
message = json.dumps(
{"command": ["script-message", event, json.dumps(payload)]}, ensure_ascii=False
)
encoded = message.encode("utf-8") + b"\n"
with lock:
try:
ipc.write(encoded)
ipc.flush()
except OSError:
pass
return _send
def iter_stream(stream: Iterable[str]) -> Iterable[str]:
for raw in stream:
yield raw.rstrip('\r\n')
yield raw.rstrip("\r\n")
def _run_task(args, parser) -> int:
if not args.command:
parser.error('run-task requires a command to execute (use "--" before the command).')
env = os.environ.copy()
for entry in args.env:
key, sep, value = entry.partition('=')
key, sep, value = entry.partition("=")
if not sep:
parser.error(f'Invalid environment variable definition: {entry!r}')
parser.error(f"Invalid environment variable definition: {entry!r}")
env[key] = value
command = list(args.command)
if command and command[0] == '--':
if command and command[0] == "--":
command.pop(0)
notifier = ipc_sender(connect_ipc(args.ipc, timeout=args.ipc_timeout))
if not command:
notifier('downlow-task-event', {
'id': args.task_id,
'event': 'error',
'message': 'No command provided after separator',
})
log('[downlow.py] No command provided for run-task', file=sys.stderr)
notifier(
"downlow-task-event",
{
"id": args.task_id,
"event": "error",
"message": "No command provided after separator",
},
)
log("[downlow.py] No command provided for run-task", file=sys.stderr)
return 1
if command and isinstance(command[0], str) and sys.executable:
first = command[0].lower()
if first in {'python', 'python3', 'py', 'python.exe', 'python3.exe', 'py.exe'}:
if first in {"python", "python3", "py", "python.exe", "python3.exe", "py.exe"}:
command[0] = sys.executable
if os.environ.get('DOWNLOW_DEBUG'):
if os.environ.get("DOWNLOW_DEBUG"):
log(f"Launching command: {command}", file=sys.stderr)
notifier('downlow-task-event', {
'id': args.task_id,
'event': 'start',
'command': command,
'cwd': args.cwd or os.getcwd(),
})
notifier(
"downlow-task-event",
{
"id": args.task_id,
"event": "start",
"command": command,
"cwd": args.cwd or os.getcwd(),
},
)
popen_kwargs = {}
if os.name == 'nt':
if os.name == "nt":
# Avoid flashing a console window when spawning console-subsystem executables.
flags = 0
try:
flags |= int(getattr(subprocess, 'CREATE_NO_WINDOW', 0x08000000))
flags |= int(getattr(subprocess, "CREATE_NO_WINDOW", 0x08000000))
except Exception:
flags |= 0x08000000
popen_kwargs['creationflags'] = flags
popen_kwargs["creationflags"] = flags
try:
si = subprocess.STARTUPINFO()
si.dwFlags |= subprocess.STARTF_USESHOWWINDOW
si.wShowWindow = subprocess.SW_HIDE
popen_kwargs['startupinfo'] = si
popen_kwargs["startupinfo"] = si
except Exception:
pass
try:
@@ -132,44 +153,59 @@ def _run_task(args, parser) -> int:
**popen_kwargs,
)
except FileNotFoundError as exc:
notifier('downlow-task-event', {
'id': args.task_id,
'event': 'error',
'message': f'Executable not found: {exc.filename}',
})
notifier(
"downlow-task-event",
{
"id": args.task_id,
"event": "error",
"message": f"Executable not found: {exc.filename}",
},
)
log(f"{exc}", file=sys.stderr)
return 1
stdout_lines: list[str] = []
stderr_lines: list[str] = []
def pump(stream: IO[str], label: str, sink: list[str]) -> None:
for line in iter_stream(stream):
sink.append(line)
notifier('downlow-task-event', {
'id': args.task_id,
'event': label,
'line': line,
})
notifier(
"downlow-task-event",
{
"id": args.task_id,
"event": label,
"line": line,
},
)
threads = []
if process.stdout:
t_out = threading.Thread(target=pump, args=(process.stdout, 'stdout', stdout_lines), daemon=True)
t_out = threading.Thread(
target=pump, args=(process.stdout, "stdout", stdout_lines), daemon=True
)
t_out.start()
threads.append(t_out)
if process.stderr:
t_err = threading.Thread(target=pump, args=(process.stderr, 'stderr', stderr_lines), daemon=True)
t_err = threading.Thread(
target=pump, args=(process.stderr, "stderr", stderr_lines), daemon=True
)
t_err.start()
threads.append(t_err)
return_code = process.wait()
for t in threads:
t.join(timeout=0.1)
notifier('downlow-task-event', {
'id': args.task_id,
'event': 'exit',
'returncode': return_code,
'success': return_code == 0,
})
notifier(
"downlow-task-event",
{
"id": args.task_id,
"event": "exit",
"returncode": return_code,
"success": return_code == 0,
},
)
# Also mirror aggregated output to stdout/stderr for compatibility when IPC is unavailable.
if stdout_lines:
log('\n'.join(stdout_lines))
log("\n".join(stdout_lines))
if stderr_lines:
log('\n'.join(stderr_lines), file=sys.stderr)
log("\n".join(stderr_lines), file=sys.stderr)
return return_code
+102 -73
View File
@@ -1,10 +1,12 @@
"""General-purpose helpers used across the downlow CLI."""
from __future__ import annotations
import json
import hashlib
import subprocess
import shutil
try:
import ffmpeg # type: ignore
except Exception:
@@ -28,12 +30,16 @@ except ImportError:
CHUNK_SIZE = 1024 * 1024 # 1 MiB
_format_logger = logging.getLogger(__name__)
def ensure_directory(path: Path) -> None:
"""Ensure *path* exists as a directory."""
try:
path.mkdir(parents=True, exist_ok=True)
except OSError as exc: # pragma: no cover - surfaced to caller
raise RuntimeError(f"Failed to create directory {path}: {exc}") from exc
def unique_path(path: Path) -> Path:
"""Return a unique path by appending " (n)" if needed."""
if not path.exists():
@@ -48,15 +54,18 @@ def unique_path(path: Path) -> Path:
return candidate
counter += 1
def sanitize_metadata_value(value: Any) -> str | None:
if value is None:
return None
if not isinstance(value, str):
value = str(value)
value = value.replace('\x00', ' ').replace('\r', ' ').replace('\n', ' ').strip()
value = value.replace("\x00", " ").replace("\r", " ").replace("\n", " ").strip()
if not value:
return None
return value
def unique_preserve_order(values: Iterable[str]) -> list[str]:
seen: set[str] = set()
ordered: list[str] = []
@@ -65,21 +74,23 @@ def unique_preserve_order(values: Iterable[str]) -> list[str]:
seen.add(value)
ordered.append(value)
return ordered
def sha256_file(file_path: Path) -> str:
"""Return the SHA-256 hex digest of *path*."""
hasher = hashlib.sha256()
with file_path.open('rb') as handle:
for chunk in iter(lambda: handle.read(CHUNK_SIZE), b''):
with file_path.open("rb") as handle:
for chunk in iter(lambda: handle.read(CHUNK_SIZE), b""):
hasher.update(chunk)
return hasher.hexdigest()
def create_metadata_sidecar(file_path: Path, metadata: dict) -> None:
"""Create a .metadata sidecar file with JSON metadata.
The metadata dict should contain title. If not present, it will be derived from
the filename. This ensures the .metadata file can be matched during batch import.
Args:
file_path: Path to the exported file
metadata: Dictionary of metadata to save
@@ -90,50 +101,50 @@ def create_metadata_sidecar(file_path: Path, metadata: dict) -> None:
file_ext = file_path.suffix.lower()
# Ensure metadata has a title field that matches the filename (without extension)
# This allows the sidecar to be matched and imported properly during batch import
if 'title' not in metadata or not metadata.get('title'):
metadata['title'] = file_name
metadata['hash'] = sha256_file(file_path)
metadata['size'] = Path(file_path).stat().st_size
if "title" not in metadata or not metadata.get("title"):
metadata["title"] = file_name
metadata["hash"] = sha256_file(file_path)
metadata["size"] = Path(file_path).stat().st_size
format_found = False
for mime_type, ext_map in SYS.utils_constant.mime_maps.items():
for key, info in ext_map.items():
if info.get("ext") == file_ext:
metadata['type'] = mime_type
metadata["type"] = mime_type
format_found = True
break
if format_found:
break
else:
metadata['type'] = 'unknown'
metadata["type"] = "unknown"
metadata.update(ffprobe(str(file_path)))
metadata_path = file_path.with_suffix(file_path.suffix + '.metadata')
metadata_path = file_path.with_suffix(file_path.suffix + ".metadata")
try:
with open(metadata_path, 'w', encoding='utf-8') as f:
with open(metadata_path, "w", encoding="utf-8") as f:
json.dump(metadata, f, ensure_ascii=False, indent=2)
except OSError as exc:
raise RuntimeError(f"Failed to write metadata sidecar {metadata_path}: {exc}") from exc
def create_tags_sidecar(file_path: Path, tags: set) -> None:
"""Create a .tag sidecar file with tags (one per line).
Args:
file_path: Path to the exported file
tags: Set of tag strings
"""
if not tags:
return
tags_path = file_path.with_suffix(file_path.suffix + '.tag')
tags_path = file_path.with_suffix(file_path.suffix + ".tag")
try:
with open(tags_path, 'w', encoding='utf-8') as f:
with open(tags_path, "w", encoding="utf-8") as f:
for tag in sorted(tags):
f.write(f"{str(tag).strip().lower()}\n")
except Exception as e:
raise RuntimeError(f"Failed to create tags sidecar {tags_path}: {e}") from e
def ffprobe(file_path: str) -> dict:
"""Probe a media file and return a metadata dictionary.
@@ -157,7 +168,16 @@ def ffprobe(file_path: str) -> dict:
if ffprobe_cmd:
try:
proc = subprocess.run(
[ffprobe_cmd, "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", str(file_path)],
[
ffprobe_cmd,
"-v",
"quiet",
"-print_format",
"json",
"-show_format",
"-show_streams",
str(file_path),
],
check=True,
capture_output=True,
text=True,
@@ -185,7 +205,9 @@ def ffprobe(file_path: str) -> dict:
if codec_type == "audio":
metadata["audio_codec"] = stream.get("codec_name")
metadata["bitrate"] = int(stream.get("bit_rate", 0)) if "bit_rate" in stream else None
metadata["samplerate"] = int(stream.get("sample_rate", 0)) if "sample_rate" in stream else None
metadata["samplerate"] = (
int(stream.get("sample_rate", 0)) if "sample_rate" in stream else None
)
metadata["channels"] = int(stream.get("channels", 0)) if "channels" in stream else None
elif codec_type == "video":
metadata["video_codec"] = stream.get("codec_name")
@@ -233,16 +255,16 @@ def jsonify(value: Any) -> Any:
def format_bytes(bytes_value) -> str:
"""Format bytes to human-readable format (e.g., '1.5 MB', '250 KB').
Args:
bytes_value: Size in bytes (int or float)
Returns:
Formatted string like '1.5 MB' or '756 MB'
"""
if bytes_value is None or bytes_value <= 0:
return "0 B"
if isinstance(bytes_value, (int, float)):
for unit in ("B", "KB", "MB", "GB", "TB"):
if bytes_value < 1024:
@@ -256,33 +278,33 @@ def format_bytes(bytes_value) -> str:
def format_duration(seconds) -> str:
"""Format duration in seconds to human-readable format (e.g., '1h 23m 5s', '5m 30s').
Args:
seconds: Duration in seconds (int or float)
Returns:
Formatted string like '1:23:45' or '5:30'
"""
if seconds is None or seconds == '':
if seconds is None or seconds == "":
return "N/A"
if isinstance(seconds, str):
try:
seconds = float(seconds)
except ValueError:
return str(seconds)
if not isinstance(seconds, (int, float)):
return str(seconds)
total_seconds = int(seconds)
if total_seconds < 0:
return "N/A"
hours = total_seconds // 3600
minutes = (total_seconds % 3600) // 60
secs = total_seconds % 60
if hours > 0:
return f"{hours}:{minutes:02d}:{secs:02d}"
elif minutes > 0:
@@ -293,53 +315,60 @@ def format_duration(seconds) -> str:
def format_timestamp(timestamp_str) -> str:
"""Format ISO timestamp to readable format.
Args:
timestamp_str: ISO format timestamp string or None
Returns:
Formatted string like "2025-10-28 19:36:01" or original string if parsing fails
"""
if not timestamp_str:
return "N/A"
try:
# Handle ISO format timestamps
if isinstance(timestamp_str, str):
# Try parsing ISO format
if 'T' in timestamp_str:
dt = datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
if "T" in timestamp_str:
dt = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
else:
# Try other common formats
dt = datetime.fromisoformat(timestamp_str)
return dt.strftime("%Y-%m-%d %H:%M:%S")
except Exception as e:
_format_logger.debug(f"Could not parse timestamp '{timestamp_str}': {e}")
return str(timestamp_str)
def format_metadata_value(key: str, value) -> str:
"""Format a metadata value based on its key for display.
This is the central formatting rule for all metadata display.
Args:
key: Metadata field name
value: Value to format
Returns:
Formatted string for display
"""
if value is None or value == '':
if value is None or value == "":
return "N/A"
# Apply field-specific formatting
if key in ('size', 'file_size'):
if key in ("size", "file_size"):
return format_bytes(value)
elif key in ('duration', 'length'):
elif key in ("duration", "length"):
return format_duration(value)
elif key in ('time_modified', 'time_imported', 'created_at', 'updated_at', 'indexed_at', 'timestamp'):
elif key in (
"time_modified",
"time_imported",
"created_at",
"updated_at",
"indexed_at",
"timestamp",
):
return format_timestamp(value)
else:
return str(value)
@@ -353,50 +382,50 @@ def format_metadata_value(key: str, value) -> str:
def extract_link_from_args(args: Iterable[str]) -> Any | None:
"""Extract HTTP/HTTPS URL from command arguments.
Args:
args: Command arguments
Returns:
URL string if found, None otherwise
"""
args_list = list(args) if not isinstance(args, (list, tuple)) else args
if not args_list or len(args_list) == 0:
return None
potential_link = str(args_list[0])
if potential_link.startswith(('http://', 'https://')):
if potential_link.startswith(("http://", "https://")):
return potential_link
return None
def extract_link_from_result(result: Any) -> Any | None:
"""Extract URL from a result object (dict or object with attributes).
Args:
result: Result object from pipeline (dict or object)
Returns:
URL string if found, None otherwise
"""
if isinstance(result, dict):
return result.get('url') or result.get('link') or result.get('href')
return result.get("url") or result.get("link") or result.get("href")
return (
getattr(result, 'url', None) or
getattr(result, 'link', None) or
getattr(result, 'href', None)
getattr(result, "url", None)
or getattr(result, "link", None)
or getattr(result, "href", None)
)
def extract_link(result: Any, args: Iterable[str]) -> Any | None:
"""Extract link from args or result (args take priority).
Args:
result: Pipeline result object
args: Command arguments
Returns:
URL string if found, None otherwise
"""
@@ -404,34 +433,34 @@ def extract_link(result: Any, args: Iterable[str]) -> Any | None:
link = extract_link_from_args(args)
if link:
return link
# Fall back to result
return extract_link_from_result(result)
def get_api_key(config: dict[str, Any], service: str, key_path: str) -> str | None:
"""Get API key from config with fallback support.
Args:
config: Configuration dictionary
service: Service name for logging
key_path: Dot-notation path to key (e.g., "Debrid.All-debrid")
Returns:
API key if found and not empty, None otherwise
"""
try:
parts = key_path.split('.')
parts = key_path.split(".")
value = config
for part in parts:
if isinstance(value, dict):
value = value.get(part)
else:
return None
if isinstance(value, str):
return value.strip() or None
return None
except Exception:
return None
@@ -439,18 +468,18 @@ def get_api_key(config: dict[str, Any], service: str, key_path: str) -> str | No
def add_direct_link_to_result(result: Any, direct_link: str, original_link: str) -> None:
"""Add direct link information to result object.
Args:
result: Result object to modify (dict or object)
direct_link: The unlocked/direct URL
original_link: The original restricted URL
"""
if isinstance(result, dict):
result['direct_link'] = direct_link
result['original_link'] = original_link
result["direct_link"] = direct_link
result["original_link"] = original_link
else:
setattr(result, 'direct_link', direct_link)
setattr(result, 'original_link', original_link)
setattr(result, "direct_link", direct_link)
setattr(result, "original_link", original_link)
# ============================================================================
@@ -529,4 +558,4 @@ def resolve_url_policy(config: dict[str, Any], url: str) -> UrlPolicy:
for tag in rule["extra_tags"]:
if tag not in resolved.extra_tags:
resolved.extra_tags.append(tag)
return resolved
return resolved
+98 -81
View File
@@ -1,82 +1,99 @@
mime_maps = {
"image": {
"jpg": { "ext": ".jpg", "mimes": ["image/jpeg", "image/jpg"] },
"png": { "ext": ".png", "mimes": ["image/png"] },
"gif": { "ext": ".gif", "mimes": ["image/gif"] },
"webp": { "ext": ".webp", "mimes": ["image/webp"] },
"avif": { "ext": ".avif", "mimes": ["image/avif"] },
"jxl": { "ext": ".jxl", "mimes": ["image/jxl"] },
"bmp": { "ext": ".bmp", "mimes": ["image/bmp"] },
"heic": { "ext": ".heic", "mimes": ["image/heic"] },
"heif": { "ext": ".heif", "mimes": ["image/heif"] },
"ico": { "ext": ".ico", "mimes": ["image/x-icon", "image/vnd.microsoft.icon"] },
"qoi": { "ext": ".qoi", "mimes": ["image/qoi"] },
"tiff": { "ext": ".tiff", "mimes": ["image/tiff", "image/x-tiff"] },
"svg": { "ext": ".svg", "mimes": ["image/svg+xml"] }
},
"image_sequence": {
"apng": { "ext": ".apng", "mimes": ["image/apng"], "sequence": True },
"avifs": { "ext": ".avifs", "mimes": ["image/avif-sequence"], "sequence": True },
"heics": { "ext": ".heics", "mimes": ["image/heic-sequence"], "sequence": True },
"heifs": { "ext": ".heifs", "mimes": ["image/heif-sequence"], "sequence": True }
},
"video": {
"mp4": { "ext": ".mp4", "mimes": ["video/mp4", "audio/mp4"] },
"webm": { "ext": ".webm", "mimes": ["video/webm", "audio/webm"] },
"mov": { "ext": ".mov", "mimes": ["video/quicktime"] },
"ogv": { "ext": ".ogv", "mimes": ["video/ogg"] },
"mpeg": { "ext": ".mpeg", "mimes": ["video/mpeg"] },
"avi": { "ext": ".avi", "mimes": ["video/x-msvideo", "video/avi"] },
"flv": { "ext": ".flv", "mimes": ["video/x-flv"] },
"mkv": { "ext": ".mkv", "mimes": ["video/x-matroska", "application/x-matroska"], "audio_only_ext": ".mka" },
"wmv": { "ext": ".wmv", "mimes": ["video/x-ms-wmv"] },
"rv": { "ext": ".rv", "mimes": ["video/vnd.rn-realvideo"] }
},
"audio": {
"mp3": { "ext": ".mp3", "mimes": ["audio/mpeg", "audio/mp3"] },
"m4a": { "ext": ".m4a", "mimes": ["audio/mp4", "audio/x-m4a"] },
"ogg": { "ext": ".ogg", "mimes": ["audio/ogg"] },
"opus": { "ext": ".opus", "mimes": ["audio/opus"] },
"flac": { "ext": ".flac", "mimes": ["audio/flac"] },
"wav": { "ext": ".wav", "mimes": ["audio/wav", "audio/x-wav", "audio/vnd.wave"] },
"wma": { "ext": ".wma", "mimes": ["audio/x-ms-wma"] },
"tta": { "ext": ".tta", "mimes": ["audio/x-tta"] },
"wv": { "ext": ".wv", "mimes": ["audio/x-wavpack", "audio/wavpack"] },
"mka": { "ext": ".mka", "mimes": ["audio/x-matroska", "video/x-matroska"] }
},
"document": {
"pdf": { "ext": ".pdf", "mimes": ["application/pdf"] },
"epub": { "ext": ".epub", "mimes": ["application/epub+zip"] },
"djvu": { "ext": ".djvu", "mimes": ["application/vnd.djvu"] },
"rtf": { "ext": ".rtf", "mimes": ["application/rtf"] },
"docx": { "ext": ".docx", "mimes": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"] },
"xlsx": { "ext": ".xlsx", "mimes": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"] },
"pptx": { "ext": ".pptx", "mimes": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"] },
"doc": { "ext": ".doc", "mimes": ["application/msword"] },
"xls": { "ext": ".xls", "mimes": ["application/vnd.ms-excel"] },
"ppt": { "ext": ".ppt", "mimes": ["application/vnd.ms-powerpoint"] }
},
"archive": {
"zip": { "ext": ".zip", "mimes": ["application/zip"] },
"7z": { "ext": ".7z", "mimes": ["application/x-7z-compressed"] },
"rar": { "ext": ".rar", "mimes": ["application/x-rar-compressed", "application/vnd.rar"] },
"gz": { "ext": ".gz", "mimes": ["application/gzip", "application/x-gzip"] },
"tar": { "ext": ".tar", "mimes": ["application/x-tar"] },
"cbz": { "ext": ".cbz", "mimes": ["application/zip"], "note": "zip archive of images; prefer extension-based detection for comics" }
},
"project": {
"clip": { "ext": ".clip", "mimes": ["application/clip"] },
"kra": { "ext": ".kra", "mimes": ["application/x-krita"] },
"procreate": { "ext": ".procreate", "mimes": ["application/x-procreate"] },
"psd": { "ext": ".psd", "mimes": ["image/vnd.adobe.photoshop"] },
"swf": { "ext": ".swf", "mimes": ["application/x-shockwave-flash"] }
},
"other": {
"octet-stream": { "ext": "", "mimes": ["application/octet-stream"] },
"json": { "ext": ".json", "mimes": ["application/json"] },
"xml": { "ext": ".xml", "mimes": ["application/xml", "text/xml"] },
"csv": { "ext": ".csv", "mimes": ["text/csv"] }
}
"image": {
"jpg": {"ext": ".jpg", "mimes": ["image/jpeg", "image/jpg"]},
"png": {"ext": ".png", "mimes": ["image/png"]},
"gif": {"ext": ".gif", "mimes": ["image/gif"]},
"webp": {"ext": ".webp", "mimes": ["image/webp"]},
"avif": {"ext": ".avif", "mimes": ["image/avif"]},
"jxl": {"ext": ".jxl", "mimes": ["image/jxl"]},
"bmp": {"ext": ".bmp", "mimes": ["image/bmp"]},
"heic": {"ext": ".heic", "mimes": ["image/heic"]},
"heif": {"ext": ".heif", "mimes": ["image/heif"]},
"ico": {"ext": ".ico", "mimes": ["image/x-icon", "image/vnd.microsoft.icon"]},
"qoi": {"ext": ".qoi", "mimes": ["image/qoi"]},
"tiff": {"ext": ".tiff", "mimes": ["image/tiff", "image/x-tiff"]},
"svg": {"ext": ".svg", "mimes": ["image/svg+xml"]},
},
"image_sequence": {
"apng": {"ext": ".apng", "mimes": ["image/apng"], "sequence": True},
"avifs": {"ext": ".avifs", "mimes": ["image/avif-sequence"], "sequence": True},
"heics": {"ext": ".heics", "mimes": ["image/heic-sequence"], "sequence": True},
"heifs": {"ext": ".heifs", "mimes": ["image/heif-sequence"], "sequence": True},
},
"video": {
"mp4": {"ext": ".mp4", "mimes": ["video/mp4", "audio/mp4"]},
"webm": {"ext": ".webm", "mimes": ["video/webm", "audio/webm"]},
"mov": {"ext": ".mov", "mimes": ["video/quicktime"]},
"ogv": {"ext": ".ogv", "mimes": ["video/ogg"]},
"mpeg": {"ext": ".mpeg", "mimes": ["video/mpeg"]},
"avi": {"ext": ".avi", "mimes": ["video/x-msvideo", "video/avi"]},
"flv": {"ext": ".flv", "mimes": ["video/x-flv"]},
"mkv": {
"ext": ".mkv",
"mimes": ["video/x-matroska", "application/x-matroska"],
"audio_only_ext": ".mka",
},
"wmv": {"ext": ".wmv", "mimes": ["video/x-ms-wmv"]},
"rv": {"ext": ".rv", "mimes": ["video/vnd.rn-realvideo"]},
},
"audio": {
"mp3": {"ext": ".mp3", "mimes": ["audio/mpeg", "audio/mp3"]},
"m4a": {"ext": ".m4a", "mimes": ["audio/mp4", "audio/x-m4a"]},
"ogg": {"ext": ".ogg", "mimes": ["audio/ogg"]},
"opus": {"ext": ".opus", "mimes": ["audio/opus"]},
"flac": {"ext": ".flac", "mimes": ["audio/flac"]},
"wav": {"ext": ".wav", "mimes": ["audio/wav", "audio/x-wav", "audio/vnd.wave"]},
"wma": {"ext": ".wma", "mimes": ["audio/x-ms-wma"]},
"tta": {"ext": ".tta", "mimes": ["audio/x-tta"]},
"wv": {"ext": ".wv", "mimes": ["audio/x-wavpack", "audio/wavpack"]},
"mka": {"ext": ".mka", "mimes": ["audio/x-matroska", "video/x-matroska"]},
},
"document": {
"pdf": {"ext": ".pdf", "mimes": ["application/pdf"]},
"epub": {"ext": ".epub", "mimes": ["application/epub+zip"]},
"djvu": {"ext": ".djvu", "mimes": ["application/vnd.djvu"]},
"rtf": {"ext": ".rtf", "mimes": ["application/rtf"]},
"docx": {
"ext": ".docx",
"mimes": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"],
},
"xlsx": {
"ext": ".xlsx",
"mimes": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"],
},
"pptx": {
"ext": ".pptx",
"mimes": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"],
},
"doc": {"ext": ".doc", "mimes": ["application/msword"]},
"xls": {"ext": ".xls", "mimes": ["application/vnd.ms-excel"]},
"ppt": {"ext": ".ppt", "mimes": ["application/vnd.ms-powerpoint"]},
},
"archive": {
"zip": {"ext": ".zip", "mimes": ["application/zip"]},
"7z": {"ext": ".7z", "mimes": ["application/x-7z-compressed"]},
"rar": {"ext": ".rar", "mimes": ["application/x-rar-compressed", "application/vnd.rar"]},
"gz": {"ext": ".gz", "mimes": ["application/gzip", "application/x-gzip"]},
"tar": {"ext": ".tar", "mimes": ["application/x-tar"]},
"cbz": {
"ext": ".cbz",
"mimes": ["application/zip"],
"note": "zip archive of images; prefer extension-based detection for comics",
},
},
"project": {
"clip": {"ext": ".clip", "mimes": ["application/clip"]},
"kra": {"ext": ".kra", "mimes": ["application/x-krita"]},
"procreate": {"ext": ".procreate", "mimes": ["application/x-procreate"]},
"psd": {"ext": ".psd", "mimes": ["image/vnd.adobe.photoshop"]},
"swf": {"ext": ".swf", "mimes": ["application/x-shockwave-flash"]},
},
"other": {
"octet-stream": {"ext": "", "mimes": ["application/octet-stream"]},
"json": {"ext": ".json", "mimes": ["application/json"]},
"xml": {"ext": ".xml", "mimes": ["application/xml", "text/xml"]},
"csv": {"ext": ".csv", "mimes": ["text/csv"]},
},
}
@@ -90,15 +107,15 @@ def get_type_from_ext(ext: str) -> str:
Type string (e.g., 'image', 'video', 'audio') or 'other' if unknown
"""
if not ext:
return 'other'
return "other"
ext_clean = ext.lstrip('.').lower()
ext_clean = ext.lstrip(".").lower()
for type_name, extensions_dict in mime_maps.items():
if ext_clean in extensions_dict:
return type_name
return 'other'
return "other"
# Canonical supported extension set for all stores/cmdlets.
+172 -135
View File
@@ -19,11 +19,17 @@ logger = logging.getLogger(__name__)
class Worker:
"""Represents a single worker task with state management."""
def __init__(self, worker_id: str, worker_type: str, title: str = "",
description: str = "", manager: Optional['WorkerManager'] = None):
def __init__(
self,
worker_id: str,
worker_type: str,
title: str = "",
description: str = "",
manager: Optional["WorkerManager"] = None,
):
"""Initialize a worker.
Args:
worker_id: Unique identifier for this worker
worker_type: Type of work (e.g., 'download', 'search', 'import')
@@ -43,10 +49,10 @@ class Worker:
self.result = "pending"
self._stdout_buffer = []
self._steps_buffer = []
def log_step(self, step_text: str) -> None:
"""Log a step for this worker.
Args:
step_text: Text describing the step
"""
@@ -57,10 +63,10 @@ class Worker:
logger.info(f"[{self.id}] {step_text}")
except Exception as e:
logger.error(f"Error logging step for worker {self.id}: {e}")
def append_stdout(self, text: str) -> None:
"""Append text to stdout log.
Args:
text: Text to append
"""
@@ -71,10 +77,10 @@ class Worker:
self._stdout_buffer.append(text)
except Exception as e:
logger.error(f"Error appending stdout for worker {self.id}: {e}")
def get_stdout(self) -> str:
"""Get all stdout for this worker.
Returns:
Complete stdout text
"""
@@ -86,10 +92,10 @@ class Worker:
except Exception as e:
logger.error(f"Error getting stdout for worker {self.id}: {e}")
return ""
def get_steps(self) -> str:
"""Get all steps for this worker.
Returns:
Complete steps text
"""
@@ -101,10 +107,10 @@ class Worker:
except Exception as e:
logger.error(f"Error getting steps for worker {self.id}: {e}")
return ""
def update_progress(self, progress: str = "", details: str = "") -> None:
"""Update worker progress.
Args:
progress: Progress string (e.g., "50%")
details: Additional details
@@ -116,10 +122,10 @@ class Worker:
self.manager.update_worker(self.id, progress, details)
except Exception as e:
logger.error(f"Error updating worker {self.id}: {e}")
def finish(self, result: str = "completed", message: str = "") -> None:
"""Mark worker as finished.
Args:
result: Result status ('completed', 'error', 'cancelled')
message: Result message/error details
@@ -139,12 +145,16 @@ class Worker:
class WorkerLoggingHandler(logging.StreamHandler):
"""Custom logging handler that captures logs for a worker."""
def __init__(self, worker_id: str, db: API_folder_store,
manager: Optional['WorkerManager'] = None,
buffer_size: int = 50):
def __init__(
self,
worker_id: str,
db: API_folder_store,
manager: Optional["WorkerManager"] = None,
buffer_size: int = 50,
):
"""Initialize the handler.
Args:
worker_id: ID of the worker to capture logs for
db: Reference to LocalLibraryDB for storing logs
@@ -157,14 +167,13 @@ class WorkerLoggingHandler(logging.StreamHandler):
self.buffer_size = buffer_size
self.buffer = []
self._lock = Lock()
# Set a format that includes timestamp and level
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
"%(asctime)s - %(name)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
)
self.setFormatter(formatter)
def emit(self, record):
"""Emit a log record."""
try:
@@ -183,41 +192,42 @@ class WorkerLoggingHandler(logging.StreamHandler):
except (TypeError, ValueError):
# If that fails too, just use the raw message string
msg = str(record.msg)
# Add timestamp and level if not already in message
import time
timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(record.created))
timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(record.created))
msg = f"{timestamp} - {record.name} - {record.levelname} - {msg}"
with self._lock:
self.buffer.append(msg)
# Flush to DB when buffer reaches size
if len(self.buffer) >= self.buffer_size:
self._flush()
except Exception:
self.handleError(record)
def _flush(self):
"""Flush buffered logs to database."""
if self.buffer:
log_text = '\n'.join(self.buffer)
log_text = "\n".join(self.buffer)
try:
if self.manager:
self.manager.append_worker_stdout(self.worker_id, log_text, channel='log')
self.manager.append_worker_stdout(self.worker_id, log_text, channel="log")
else:
self.db.append_worker_stdout(self.worker_id, log_text, channel='log')
self.db.append_worker_stdout(self.worker_id, log_text, channel="log")
except Exception as e:
# If we can't write to DB, at least log it
log(f"Error flushing worker logs: {e}")
self.buffer = []
def flush(self):
"""Flush any buffered records."""
with self._lock:
self._flush()
super().flush()
def close(self):
"""Close the handler."""
self.flush()
@@ -226,10 +236,10 @@ class WorkerLoggingHandler(logging.StreamHandler):
class WorkerManager:
"""Manages persistent worker tasks with auto-refresh capability."""
def __init__(self, library_root: Path, auto_refresh_interval: float = 2.0):
"""Initialize the worker manager.
Args:
library_root: Root directory for the local library database
auto_refresh_interval: Seconds between auto-refresh checks (0 = disabled)
@@ -243,7 +253,7 @@ class WorkerManager:
self._lock = Lock()
self.worker_handlers: Dict[str, WorkerLoggingHandler] = {} # Track active handlers
self._worker_last_step: Dict[str, str] = {}
def close(self) -> None:
"""Close the database connection."""
if self.db:
@@ -251,18 +261,18 @@ class WorkerManager:
self.db.close()
except Exception:
pass
def __enter__(self):
"""Context manager entry."""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit - close database."""
self.close()
def add_refresh_callback(self, callback: Callable[[List[Dict[str, Any]]], None]) -> None:
"""Register a callback to be called on worker updates.
Args:
callback: Function that receives list of active workers
"""
@@ -297,25 +307,25 @@ class WorkerManager:
except Exception as exc:
logger.error(f"Failed to expire stale workers: {exc}", exc_info=True)
return 0
def remove_refresh_callback(self, callback: Callable) -> None:
"""Remove a refresh callback.
Args:
callback: The callback function to remove
"""
with self._lock:
if callback in self.refresh_callbacks:
self.refresh_callbacks.remove(callback)
def enable_logging_for_worker(self, worker_id: str) -> Optional[WorkerLoggingHandler]:
"""Enable logging capture for a worker.
Creates a logging handler that captures all logs for this worker.
Args:
worker_id: ID of the worker to capture logs for
Returns:
The logging handler that was created, or None if there was an error
"""
@@ -323,46 +333,57 @@ class WorkerManager:
handler = WorkerLoggingHandler(worker_id, self.db, manager=self)
with self._lock:
self.worker_handlers[worker_id] = handler
# Add the handler to the root logger so it captures all logs
root_logger = logging.getLogger()
root_logger.addHandler(handler)
root_logger.setLevel(logging.DEBUG) # Capture all levels
logger.debug(f"[WorkerManager] Enabled logging for worker: {worker_id}")
return handler
except Exception as e:
logger.error(f"[WorkerManager] Error enabling logging for worker {worker_id}: {e}", exc_info=True)
logger.error(
f"[WorkerManager] Error enabling logging for worker {worker_id}: {e}", exc_info=True
)
return None
def disable_logging_for_worker(self, worker_id: str) -> None:
"""Disable logging capture for a worker and flush any pending logs.
Args:
worker_id: ID of the worker to stop capturing logs for
"""
try:
with self._lock:
handler = self.worker_handlers.pop(worker_id, None)
if handler:
# Flush and close the handler
handler.flush()
handler.close()
# Remove from root logger
root_logger = logging.getLogger()
root_logger.removeHandler(handler)
logger.debug(f"[WorkerManager] Disabled logging for worker: {worker_id}")
except Exception as e:
logger.error(f"[WorkerManager] Error disabling logging for worker {worker_id}: {e}", exc_info=True)
def track_worker(self, worker_id: str, worker_type: str, title: str = "",
description: str = "", total_steps: int = 0,
pipe: Optional[str] = None) -> bool:
logger.error(
f"[WorkerManager] Error disabling logging for worker {worker_id}: {e}",
exc_info=True,
)
def track_worker(
self,
worker_id: str,
worker_type: str,
title: str = "",
description: str = "",
total_steps: int = 0,
pipe: Optional[str] = None,
) -> bool:
"""Start tracking a new worker.
Args:
worker_id: Unique identifier for the worker
worker_type: Type of worker (e.g., 'download', 'search', 'import')
@@ -370,12 +391,14 @@ class WorkerManager:
description: Worker description
total_steps: Total number of steps for progress tracking
pipe: Text of the originating pipe/prompt, if any
Returns:
True if worker was inserted successfully
"""
try:
result = self.db.insert_worker(worker_id, worker_type, title, description, total_steps, pipe=pipe)
result = self.db.insert_worker(
worker_id, worker_type, title, description, total_steps, pipe=pipe
)
if result > 0:
logger.debug(f"[WorkerManager] Tracking worker: {worker_id} ({worker_type})")
self._start_refresh_if_needed()
@@ -384,65 +407,69 @@ class WorkerManager:
except Exception as e:
logger.error(f"[WorkerManager] Error tracking worker: {e}", exc_info=True)
return False
def update_worker(self, worker_id: str, progress: float = 0.0, current_step: str = "",
details: str = "", error: str = "") -> bool:
def update_worker(
self,
worker_id: str,
progress: float = 0.0,
current_step: str = "",
details: str = "",
error: str = "",
) -> bool:
"""Update worker progress and status.
Args:
worker_id: Unique identifier for the worker
progress: Progress percentage (0-100)
current_step: Current step description
details: Additional details
error: Error message if any
Returns:
True if update was successful
"""
try:
kwargs = {}
if progress > 0:
kwargs['progress'] = progress
kwargs["progress"] = progress
if current_step:
kwargs['current_step'] = current_step
kwargs["current_step"] = current_step
if details:
kwargs['description'] = details
kwargs["description"] = details
if error:
kwargs['error_message'] = error
kwargs["error_message"] = error
if kwargs:
kwargs['last_updated'] = datetime.now().isoformat()
if 'current_step' in kwargs and kwargs['current_step']:
self._worker_last_step[worker_id] = str(kwargs['current_step'])
kwargs["last_updated"] = datetime.now().isoformat()
if "current_step" in kwargs and kwargs["current_step"]:
self._worker_last_step[worker_id] = str(kwargs["current_step"])
return self.db.update_worker(worker_id, **kwargs)
return True
except Exception as e:
logger.error(f"[WorkerManager] Error updating worker {worker_id}: {e}", exc_info=True)
return False
def finish_worker(self, worker_id: str, result: str = "completed",
error_msg: str = "", result_data: str = "") -> bool:
def finish_worker(
self, worker_id: str, result: str = "completed", error_msg: str = "", result_data: str = ""
) -> bool:
"""Mark a worker as finished.
Args:
worker_id: Unique identifier for the worker
result: Result status ('completed', 'error', 'cancelled')
error_msg: Error message if any
result_data: Result data as JSON string
Returns:
True if update was successful
"""
try:
kwargs = {
'status': result,
'completed_at': datetime.now().isoformat()
}
kwargs = {"status": result, "completed_at": datetime.now().isoformat()}
if error_msg:
kwargs['error_message'] = error_msg
kwargs["error_message"] = error_msg
if result_data:
kwargs['result_data'] = result_data
kwargs["result_data"] = result_data
success = self.db.update_worker(worker_id, **kwargs)
logger.info(f"[WorkerManager] Worker finished: {worker_id} ({result})")
self._worker_last_step.pop(worker_id, None)
@@ -450,10 +477,10 @@ class WorkerManager:
except Exception as e:
logger.error(f"[WorkerManager] Error finishing worker {worker_id}: {e}", exc_info=True)
return False
def get_active_workers(self) -> List[Dict[str, Any]]:
"""Get all active (running) workers.
Returns:
List of active worker dictionaries
"""
@@ -462,31 +489,33 @@ class WorkerManager:
except Exception as e:
logger.error(f"[WorkerManager] Error getting active workers: {e}", exc_info=True)
return []
def get_finished_workers(self, limit: int = 100) -> List[Dict[str, Any]]:
"""Get all finished workers (completed, errored, or cancelled).
Args:
limit: Maximum number of workers to retrieve
Returns:
List of finished worker dictionaries
"""
try:
all_workers = self.db.get_all_workers(limit=limit)
# Filter to only finished workers
finished = [w for w in all_workers if w.get('status') in ['completed', 'error', 'cancelled']]
finished = [
w for w in all_workers if w.get("status") in ["completed", "error", "cancelled"]
]
return finished
except Exception as e:
logger.error(f"[WorkerManager] Error getting finished workers: {e}", exc_info=True)
return []
def get_worker(self, worker_id: str) -> Optional[Dict[str, Any]]:
"""Get a specific worker's data.
Args:
worker_id: Unique identifier for the worker
Returns:
Worker data or None if not found
"""
@@ -495,18 +524,18 @@ class WorkerManager:
except Exception as e:
logger.error(f"[WorkerManager] Error getting worker {worker_id}: {e}", exc_info=True)
return None
def get_worker_events(self, worker_id: str, limit: int = 500) -> List[Dict[str, Any]]:
"""Fetch recorded worker timeline events."""
return self.db.get_worker_events(worker_id, limit)
def log_step(self, worker_id: str, step_text: str) -> bool:
"""Log a step to a worker's step history.
Args:
worker_id: Unique identifier for the worker
step_text: Step description to log
Returns:
True if successful
"""
@@ -516,43 +545,49 @@ class WorkerManager:
self._worker_last_step[worker_id] = step_text
return success
except Exception as e:
logger.error(f"[WorkerManager] Error logging step for worker {worker_id}: {e}", exc_info=True)
logger.error(
f"[WorkerManager] Error logging step for worker {worker_id}: {e}", exc_info=True
)
return False
def _get_last_step(self, worker_id: str) -> Optional[str]:
"""Return the most recent step description for a worker."""
return self._worker_last_step.get(worker_id)
def get_steps(self, worker_id: str) -> str:
"""Get step logs for a worker.
Args:
worker_id: Unique identifier for the worker
Returns:
Steps text or empty string if not found
"""
try:
return self.db.get_worker_steps(worker_id)
except Exception as e:
logger.error(f"[WorkerManager] Error getting steps for worker {worker_id}: {e}", exc_info=True)
return ''
logger.error(
f"[WorkerManager] Error getting steps for worker {worker_id}: {e}", exc_info=True
)
return ""
def start_auto_refresh(self) -> None:
"""Start the auto-refresh thread for periodic worker updates."""
if self.auto_refresh_interval <= 0:
logger.debug("[WorkerManager] Auto-refresh disabled (interval <= 0)")
return
if self.refresh_thread and self.refresh_thread.is_alive():
logger.debug("[WorkerManager] Auto-refresh already running")
return
logger.info(f"[WorkerManager] Starting auto-refresh with {self.auto_refresh_interval}s interval")
logger.info(
f"[WorkerManager] Starting auto-refresh with {self.auto_refresh_interval}s interval"
)
self._stop_refresh = False
self.refresh_thread = Thread(target=self._auto_refresh_loop, daemon=True)
self.refresh_thread.start()
def stop_auto_refresh(self) -> None:
"""Stop the auto-refresh thread."""
logger.info("[WorkerManager] Stopping auto-refresh")
@@ -560,46 +595,48 @@ class WorkerManager:
if self.refresh_thread:
self.refresh_thread.join(timeout=5)
self.refresh_thread = None
def _start_refresh_if_needed(self) -> None:
"""Start auto-refresh if we have active workers and callbacks."""
active = self.get_active_workers()
if active and self.refresh_callbacks and not self._stop_refresh:
self.start_auto_refresh()
def _auto_refresh_loop(self) -> None:
"""Main auto-refresh loop that periodically queries and notifies."""
try:
while not self._stop_refresh:
time.sleep(self.auto_refresh_interval)
# Check if there are active workers
active = self.get_active_workers()
if not active:
# No more active workers, stop refreshing
logger.debug("[WorkerManager] No active workers, stopping auto-refresh")
break
# Call all registered callbacks with the active workers
with self._lock:
for callback in self.refresh_callbacks:
try:
callback(active)
except Exception as e:
logger.error(f"[WorkerManager] Error in refresh callback: {e}", exc_info=True)
logger.error(
f"[WorkerManager] Error in refresh callback: {e}", exc_info=True
)
except Exception as e:
logger.error(f"[WorkerManager] Error in auto-refresh loop: {e}", exc_info=True)
finally:
logger.debug("[WorkerManager] Auto-refresh loop ended")
def cleanup_old_workers(self, days: int = 7) -> int:
"""Clean up completed/errored workers older than specified days.
Args:
days: Delete workers completed more than this many days ago
Returns:
Number of workers deleted
"""
@@ -611,15 +648,15 @@ class WorkerManager:
except Exception as e:
logger.error(f"[WorkerManager] Error cleaning up old workers: {e}", exc_info=True)
return 0
def append_stdout(self, worker_id: str, text: str, channel: str = "stdout") -> bool:
"""Append text to a worker's stdout log.
Args:
worker_id: Unique identifier for the worker
text: Text to append
channel: Logical channel (stdout, stderr, log, etc.)
Returns:
True if append was successful
"""
@@ -629,13 +666,13 @@ class WorkerManager:
except Exception as e:
logger.error(f"[WorkerManager] Error appending stdout: {e}", exc_info=True)
return False
def get_stdout(self, worker_id: str) -> str:
"""Get stdout logs for a worker.
Args:
worker_id: Unique identifier for the worker
Returns:
Worker's stdout or empty string
"""
@@ -644,17 +681,17 @@ class WorkerManager:
except Exception as e:
logger.error(f"[WorkerManager] Error getting stdout: {e}", exc_info=True)
return ""
def append_worker_stdout(self, worker_id: str, text: str, channel: str = "stdout") -> bool:
"""Compatibility wrapper for append_stdout."""
return self.append_stdout(worker_id, text, channel=channel)
def clear_stdout(self, worker_id: str) -> bool:
"""Clear stdout logs for a worker.
Args:
worker_id: Unique identifier for the worker
Returns:
True if clear was successful
"""
@@ -663,7 +700,7 @@ class WorkerManager:
except Exception as e:
logger.error(f"[WorkerManager] Error clearing stdout: {e}", exc_info=True)
return False
def close(self) -> None:
"""Close the worker manager and database connection."""
self.stop_auto_refresh()
+324 -190
View File
File diff suppressed because it is too large Load Diff
+234 -186
View File
@@ -18,7 +18,7 @@ _HYDRUS_INIT_CHECK_CACHE: dict[tuple[str, str], tuple[bool, Optional[str]]] = {}
class HydrusNetwork(Store):
"""File storage backend for Hydrus client.
Each instance represents a specific Hydrus client connection.
Maintains its own HydrusClient.
"""
@@ -41,7 +41,7 @@ class HydrusNetwork(Store):
return instance
setattr(__new__, "keys", ("NAME", "API", "URL"))
def __init__(
self,
instance_name: Optional[str] = None,
@@ -53,7 +53,7 @@ class HydrusNetwork(Store):
URL: Optional[str] = None,
) -> None:
"""Initialize Hydrus storage backend.
Args:
instance_name: Name of this Hydrus instance (e.g., 'home', 'work')
api_key: Hydrus Client API access key
@@ -70,7 +70,7 @@ class HydrusNetwork(Store):
if not instance_name or not api_key or not url:
raise ValueError("HydrusNetwork requires NAME, API, and URL")
self.NAME = instance_name
self.API = api_key
self.URL = url.rstrip("/")
@@ -104,7 +104,9 @@ class HydrusNetwork(Store):
verify_resp.raise_for_status()
verify_payload = verify_resp.json()
if not isinstance(verify_payload, dict):
raise RuntimeError("Hydrus /verify_access_key returned an unexpected response")
raise RuntimeError(
"Hydrus /verify_access_key returned an unexpected response"
)
_HYDRUS_INIT_CHECK_CACHE[cache_key] = (True, None)
except Exception as exc:
@@ -196,16 +198,16 @@ class HydrusNetwork(Store):
def add_file(self, file_path: Path, **kwargs: Any) -> str:
"""Upload file to Hydrus with full metadata support.
Args:
file_path: Path to the file to upload
tag: Optional list of tag values to add
url: Optional list of url to associate with the file
title: Optional title (will be added as 'title:value' tag)
Returns:
File hash from Hydrus
Raises:
Exception: If upload fails
"""
@@ -214,7 +216,7 @@ class HydrusNetwork(Store):
tag_list = kwargs.get("tag", [])
url = kwargs.get("url", [])
title = kwargs.get("title")
# Add title to tags if provided and not already present
if title:
title_tag = f"title:{title}".strip().lower()
@@ -222,7 +224,11 @@ class HydrusNetwork(Store):
tag_list = [title_tag] + list(tag_list)
# Hydrus is lowercase-only tags; normalize here for consistency.
tag_list = [str(t).strip().lower() for t in (tag_list or []) if isinstance(t, str) and str(t).strip()]
tag_list = [
str(t).strip().lower()
for t in (tag_list or [])
if isinstance(t, str) and str(t).strip()
]
try:
# Compute file hash
@@ -307,14 +313,19 @@ class HydrusNetwork(Store):
# Associate url if provided (both for new and existing files)
if url:
log(f"{self._log_prefix()} Associating {len(url)} URL(s) with file", file=sys.stderr)
log(
f"{self._log_prefix()} Associating {len(url)} URL(s) with file", file=sys.stderr
)
for url in url:
if url:
try:
client.associate_url(file_hash, str(url))
debug(f"{self._log_prefix()} Associated URL: {url}")
except Exception as exc:
log(f"{self._log_prefix()} ⚠️ Failed to associate URL {url}: {exc}", file=sys.stderr)
log(
f"{self._log_prefix()} ⚠️ Failed to associate URL {url}: {exc}",
file=sys.stderr,
)
return file_hash
@@ -324,14 +335,14 @@ class HydrusNetwork(Store):
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
"""Search Hydrus database for files matching query.
Args:
query: Search query (tags, filenames, hashes, etc.)
limit: Maximum number of results to return (default: 100)
Returns:
List of dicts with 'name', 'hash', 'size', 'tags' fields
Example:
results = storage["hydrus"].search("artist:john_doe music")
results = storage["hydrus"].search("Simple Man")
@@ -366,7 +377,9 @@ class HydrusNetwork(Store):
return out
return []
def _iter_url_filtered_metadata(url_value: str | None, want_any: bool, fetch_limit: int) -> list[dict[str, Any]]:
def _iter_url_filtered_metadata(
url_value: str | None, want_any: bool, fetch_limit: int
) -> list[dict[str, Any]]:
"""Best-effort URL search by scanning Hydrus metadata with include_file_url=True."""
# First try a fast system predicate if Hydrus supports it.
@@ -382,7 +395,11 @@ class HydrusNetwork(Store):
)
ids = url_search.get("file_ids", []) if isinstance(url_search, dict) else []
if isinstance(ids, list):
candidate_file_ids = [int(x) for x in ids if isinstance(x, (int, float, str)) and str(x).strip().isdigit()]
candidate_file_ids = [
int(x)
for x in ids
if isinstance(x, (int, float, str)) and str(x).strip().isdigit()
]
except Exception:
candidate_file_ids = []
@@ -451,7 +468,7 @@ class HydrusNetwork(Store):
# Support `ext:<value>` anywhere in the query. We filter results by the
# Hydrus metadata extension field.
def _normalize_ext_filter(value: str) -> str:
v = str(value or "").strip().lower().lstrip('.')
v = str(value or "").strip().lower().lstrip(".")
v = "".join(ch for ch in v if ch.isalnum())
return v
@@ -464,7 +481,7 @@ class HydrusNetwork(Store):
if m:
ext_filter = _normalize_ext_filter(m.group(1)) or None
query_lower = re.sub(r"\s*\b(?:ext|extension):[^\s,]+", " ", query_lower)
query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(',')
query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(",")
query = query_lower
if ext_filter and not query_lower:
query = "*"
@@ -486,21 +503,33 @@ class HydrusNetwork(Store):
pattern = pattern.strip()
if namespace == "url":
if not pattern or pattern == "*":
metadata_list = _iter_url_filtered_metadata(None, want_any=True, fetch_limit=int(limit) if limit else 100)
metadata_list = _iter_url_filtered_metadata(
None, want_any=True, fetch_limit=int(limit) if limit else 100
)
else:
# Fast-path: exact URL via /add_urls/get_url_files when a full URL is provided.
try:
if pattern.startswith("http://") or pattern.startswith("https://"):
from API.HydrusNetwork import HydrusRequestSpec
spec = HydrusRequestSpec(method="GET", endpoint="/add_urls/get_url_files", query={"url": pattern})
spec = HydrusRequestSpec(
method="GET",
endpoint="/add_urls/get_url_files",
query={"url": pattern},
)
response = client._perform_request(spec) # type: ignore[attr-defined]
hashes: list[str] = []
file_ids: list[int] = []
if isinstance(response, dict):
raw_hashes = response.get("hashes") or response.get("file_hashes")
raw_hashes = response.get("hashes") or response.get(
"file_hashes"
)
if isinstance(raw_hashes, list):
hashes = [str(h).strip() for h in raw_hashes if isinstance(h, str) and str(h).strip()]
hashes = [
str(h).strip()
for h in raw_hashes
if isinstance(h, str) and str(h).strip()
]
raw_ids = response.get("file_ids")
if isinstance(raw_ids, list):
for item in raw_ids:
@@ -518,7 +547,11 @@ class HydrusNetwork(Store):
include_size=True,
include_mime=True,
)
metas = payload.get("metadata", []) if isinstance(payload, dict) else []
metas = (
payload.get("metadata", [])
if isinstance(payload, dict)
else []
)
if isinstance(metas, list):
metadata_list = [m for m in metas if isinstance(m, dict)]
elif hashes:
@@ -530,7 +563,11 @@ class HydrusNetwork(Store):
include_size=True,
include_mime=True,
)
metas = payload.get("metadata", []) if isinstance(payload, dict) else []
metas = (
payload.get("metadata", [])
if isinstance(payload, dict)
else []
)
if isinstance(metas, list):
metadata_list = [m for m in metas if isinstance(m, dict)]
except Exception:
@@ -538,7 +575,9 @@ class HydrusNetwork(Store):
# Fallback: substring scan
if metadata_list is None:
metadata_list = _iter_url_filtered_metadata(pattern, want_any=False, fetch_limit=int(limit) if limit else 100)
metadata_list = _iter_url_filtered_metadata(
pattern, want_any=False, fetch_limit=int(limit) if limit else 100
)
# Parse the query into tags
# "*" means "match all" - use system:everything tag in Hydrus
@@ -553,7 +592,7 @@ class HydrusNetwork(Store):
if query.strip() == "*":
tags = ["system:everything"]
elif ':' in query_lower:
elif ":" in query_lower:
tags = [query_lower]
else:
freeform_union_search = True
@@ -566,7 +605,7 @@ class HydrusNetwork(Store):
# If we can't extract alnum terms, fall back to the raw query text.
title_predicates = [f"title:{query_lower}*"]
freeform_predicates = [f"{query_lower}*"]
# Search files with the tags (unless url: search already produced metadata)
results = []
@@ -584,7 +623,9 @@ class HydrusNetwork(Store):
except (TypeError, ValueError):
continue
if isinstance(raw_hashes, list):
hashes_out = [str(h).strip() for h in raw_hashes if isinstance(h, str) and str(h).strip()]
hashes_out = [
str(h).strip() for h in raw_hashes if isinstance(h, str) and str(h).strip()
]
return ids_out, hashes_out
if metadata_list is None:
@@ -635,9 +676,7 @@ class HydrusNetwork(Store):
return []
search_result = client.search_files(
tags=tags,
return_hashes=True,
return_file_ids=True
tags=tags, return_hashes=True, return_file_ids=True
)
file_ids, hashes = _extract_search_ids(search_result)
@@ -676,12 +715,12 @@ class HydrusNetwork(Store):
if not isinstance(meta, dict):
continue
mime_type = meta.get("mime")
ext = str(meta.get("ext") or "").strip().lstrip('.')
ext = str(meta.get("ext") or "").strip().lstrip(".")
if not ext and mime_type:
for category in mime_maps.values():
for _ext_key, info in category.items():
if mime_type in info.get("mimes", []):
ext = str(info.get("ext", "")).strip().lstrip('.')
ext = str(info.get("ext", "")).strip().lstrip(".")
break
if ext:
break
@@ -696,6 +735,7 @@ class HydrusNetwork(Store):
all_tags: list[str] = []
title = f"Hydrus File {file_id}"
if isinstance(tags_set, dict):
def _collect(tag_list: Any) -> None:
nonlocal title
if not isinstance(tag_list, list):
@@ -708,7 +748,10 @@ class HydrusNetwork(Store):
if not tag_l:
continue
all_tags.append(tag_l)
if tag_l.startswith("title:") and title == f"Hydrus File {file_id}":
if (
tag_l.startswith("title:")
and title == f"Hydrus File {file_id}"
):
title = tag_l.split(":", 1)[1].strip()
for _service_name, service_tags in tags_set.items():
@@ -807,77 +850,78 @@ class HydrusNetwork(Store):
metadata_list = []
for meta in metadata_list:
if len(results) >= limit:
break
file_id = meta.get("file_id")
hash_hex = meta.get("hash")
size = meta.get("size", 0)
# Get tags for this file and extract title
tags_set = meta.get("tags", {})
all_tags = []
title = f"Hydrus File {file_id}" # Default fallback
all_tags_str = "" # For substring matching
# debug(f"[HydrusBackend.search] Processing file_id={file_id}, tags type={type(tags_set)}")
if isinstance(tags_set, dict):
# Collect both storage_tags and display_tags to capture siblings/parents and ensure title: is seen
def _collect(tag_list: Any) -> None:
nonlocal title, all_tags_str
if not isinstance(tag_list, list):
return
for tag in tag_list:
tag_text = str(tag) if tag else ""
if not tag_text:
continue
tag_l = tag_text.strip().lower()
if not tag_l:
continue
all_tags.append(tag_l)
all_tags_str += " " + tag_l
if tag_l.startswith("title:") and title == f"Hydrus File {file_id}":
title = tag_l.split(":", 1)[1].strip()
if len(results) >= limit:
break
for _service_name, service_tags in tags_set.items():
if not isinstance(service_tags, dict):
file_id = meta.get("file_id")
hash_hex = meta.get("hash")
size = meta.get("size", 0)
# Get tags for this file and extract title
tags_set = meta.get("tags", {})
all_tags = []
title = f"Hydrus File {file_id}" # Default fallback
all_tags_str = "" # For substring matching
# debug(f"[HydrusBackend.search] Processing file_id={file_id}, tags type={type(tags_set)}")
if isinstance(tags_set, dict):
# Collect both storage_tags and display_tags to capture siblings/parents and ensure title: is seen
def _collect(tag_list: Any) -> None:
nonlocal title, all_tags_str
if not isinstance(tag_list, list):
return
for tag in tag_list:
tag_text = str(tag) if tag else ""
if not tag_text:
continue
tag_l = tag_text.strip().lower()
if not tag_l:
continue
all_tags.append(tag_l)
all_tags_str += " " + tag_l
if tag_l.startswith("title:") and title == f"Hydrus File {file_id}":
title = tag_l.split(":", 1)[1].strip()
storage_tags = service_tags.get("storage_tags", {})
if isinstance(storage_tags, dict):
for tag_list in storage_tags.values():
_collect(tag_list)
for _service_name, service_tags in tags_set.items():
if not isinstance(service_tags, dict):
continue
display_tags = service_tags.get("display_tags", [])
_collect(display_tags)
storage_tags = service_tags.get("storage_tags", {})
if isinstance(storage_tags, dict):
for tag_list in storage_tags.values():
_collect(tag_list)
# Also consider top-level flattened tags payload if provided (Hydrus API sometimes includes it)
top_level_tags = meta.get("tags_flat", []) or meta.get("tags", [])
_collect(top_level_tags)
# Prefer Hydrus-provided extension (e.g. ".webm"); fall back to MIME map.
mime_type = meta.get("mime")
ext = str(meta.get("ext") or "").strip().lstrip('.')
if not ext and mime_type:
for category in mime_maps.values():
for _ext_key, info in category.items():
if mime_type in info.get("mimes", []):
ext = str(info.get("ext", "")).strip().lstrip('.')
break
if ext:
display_tags = service_tags.get("display_tags", [])
_collect(display_tags)
# Also consider top-level flattened tags payload if provided (Hydrus API sometimes includes it)
top_level_tags = meta.get("tags_flat", []) or meta.get("tags", [])
_collect(top_level_tags)
# Prefer Hydrus-provided extension (e.g. ".webm"); fall back to MIME map.
mime_type = meta.get("mime")
ext = str(meta.get("ext") or "").strip().lstrip(".")
if not ext and mime_type:
for category in mime_maps.values():
for _ext_key, info in category.items():
if mime_type in info.get("mimes", []):
ext = str(info.get("ext", "")).strip().lstrip(".")
break
if ext:
break
# Filter results based on query type
# If user provided explicit namespace (has ':'), don't do substring filtering
# Just include what the tag search returned
has_namespace = ':' in query_lower
if has_namespace:
# Explicit namespace search - already filtered by Hydrus tag search
# Include this result as-is
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}"
results.append({
# Filter results based on query type
# If user provided explicit namespace (has ':'), don't do substring filtering
# Just include what the tag search returned
has_namespace = ":" in query_lower
if has_namespace:
# Explicit namespace search - already filtered by Hydrus tag search
# Include this result as-is
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}"
results.append(
{
"hash": hash_hex,
"url": file_url,
"name": title,
@@ -885,27 +929,31 @@ class HydrusNetwork(Store):
"size": size,
"size_bytes": size,
"store": self.NAME,
"tag": all_tags,
"tag": all_tags,
"file_id": file_id,
"mime": mime_type,
"ext": ext,
})
else:
# Free-form search: check if search terms match title or FREEFORM tags.
# Do NOT implicitly match other namespace tags (except title:).
freeform_tags = [t for t in all_tags if isinstance(t, str) and t and (":" not in t)]
searchable_text = (title + " " + " ".join(freeform_tags)).lower()
}
)
else:
# Free-form search: check if search terms match title or FREEFORM tags.
# Do NOT implicitly match other namespace tags (except title:).
freeform_tags = [
t for t in all_tags if isinstance(t, str) and t and (":" not in t)
]
searchable_text = (title + " " + " ".join(freeform_tags)).lower()
match = True
if query_lower != "*" and search_terms:
for term in search_terms:
if term not in searchable_text:
match = False
break
if match:
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}"
results.append({
match = True
if query_lower != "*" and search_terms:
for term in search_terms:
if term not in searchable_text:
match = False
break
if match:
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}"
results.append(
{
"hash": hash_hex,
"url": file_url,
"name": title,
@@ -917,8 +965,9 @@ class HydrusNetwork(Store):
"file_id": file_id,
"mime": mime_type,
"ext": ext,
})
}
)
debug(f"{prefix} {len(results)} result(s)")
if ext_filter:
wanted = ext_filter
@@ -936,6 +985,7 @@ class HydrusNetwork(Store):
except Exception as exc:
log(f"❌ Hydrus search failed: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
raise
@@ -945,13 +995,15 @@ class HydrusNetwork(Store):
IMPORTANT: this method must be side-effect free (do not auto-open a browser).
Only explicit user actions (e.g. the get-file cmdlet) should open files.
"""
debug(f"{self._log_prefix()} get_file: start hash={file_hash[:12]}...")
# Build browser URL with access key
base_url = str(self.URL).rstrip('/')
base_url = str(self.URL).rstrip("/")
access_key = str(self.API)
browser_url = f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}"
browser_url = (
f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}"
)
debug(f"{self._log_prefix()} get_file: url={browser_url}")
return browser_url
@@ -972,7 +1024,9 @@ class HydrusNetwork(Store):
return False
reason = kwargs.get("reason")
reason_text = str(reason).strip() if isinstance(reason, str) and reason.strip() else None
reason_text = (
str(reason).strip() if isinstance(reason, str) and reason.strip() else None
)
# 1) Delete file
client.delete_files([file_hash], reason=reason_text)
@@ -990,10 +1044,10 @@ class HydrusNetwork(Store):
def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]:
"""Get metadata for a file from Hydrus by hash.
Args:
file_hash: SHA256 hash of the file (64-char hex string)
Returns:
Dict with metadata fields or None if not found
"""
@@ -1002,7 +1056,7 @@ class HydrusNetwork(Store):
if not client:
debug(f"{self._log_prefix()} get_metadata: client unavailable")
return None
# Fetch file metadata with the fields we need for CLI display.
payload = client.fetch_file_metadata(
hashes=[file_hash],
@@ -1012,16 +1066,16 @@ class HydrusNetwork(Store):
include_size=True,
include_mime=True,
)
if not payload or not payload.get("metadata"):
return None
meta = payload["metadata"][0]
# Hydrus can return placeholder metadata rows for unknown hashes.
if not isinstance(meta, dict) or meta.get("file_id") is None:
return None
# Extract title from tags
title = f"Hydrus_{file_hash[:12]}"
tags_payload = meta.get("tags", {})
@@ -1038,10 +1092,12 @@ class HydrusNetwork(Store):
break
if title != f"Hydrus_{file_hash[:12]}":
break
# Hydrus may return mime as an int enum, or sometimes a human label.
mime_val = meta.get("mime")
filetype_human = meta.get("filetype_human") or meta.get("mime_human") or meta.get("mime_string")
filetype_human = (
meta.get("filetype_human") or meta.get("mime_human") or meta.get("mime_string")
)
# Determine ext: prefer Hydrus metadata ext, then filetype_human (when it looks like an ext),
# then title suffix, then file path suffix.
@@ -1113,19 +1169,16 @@ class HydrusNetwork(Store):
dur_int: int | None = int(dur_val) if dur_val is not None else None
except Exception:
dur_int = None
raw_urls = (
meta.get("known_urls")
or meta.get("urls")
or meta.get("url")
or []
)
raw_urls = meta.get("known_urls") or meta.get("urls") or meta.get("url") or []
url_list: list[str] = []
if isinstance(raw_urls, str):
s = raw_urls.strip()
url_list = [s] if s else []
elif isinstance(raw_urls, list):
url_list = [str(u).strip() for u in raw_urls if isinstance(u, str) and str(u).strip()]
url_list = [
str(u).strip() for u in raw_urls if isinstance(u, str) and str(u).strip()
]
return {
"hash": file_hash,
@@ -1139,18 +1192,18 @@ class HydrusNetwork(Store):
"duration_ms": dur_int,
"url": url_list,
}
except Exception as exc:
debug(f"{self._log_prefix()} get_metadata failed: {exc}")
return None
def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]:
"""Get tags for a file from Hydrus by hash.
Args:
file_identifier: File hash (SHA256 hex string)
**kwargs: Optional service_name parameter
Returns:
Tuple of (tags_list, source_description)
where source is always "hydrus"
@@ -1162,46 +1215,45 @@ class HydrusNetwork(Store):
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
debug(f"{self._log_prefix()} get_tags: invalid file hash '{file_identifier}'")
return [], "unknown"
# Get Hydrus client and service info
client = self._client
if not client:
debug(f"{self._log_prefix()} get_tags: client unavailable")
return [], "unknown"
# Fetch file metadata
payload = client.fetch_file_metadata(
hashes=[file_hash],
include_service_keys_to_tags=True,
include_file_url=False
hashes=[file_hash], include_service_keys_to_tags=True, include_file_url=False
)
items = payload.get("metadata") if isinstance(payload, dict) else None
if not isinstance(items, list) or not items:
debug(f"{self._log_prefix()} get_tags: no metadata for hash {file_hash}")
return [], "unknown"
meta = items[0] if isinstance(items[0], dict) else None
if not isinstance(meta, dict) or meta.get("file_id") is None:
debug(f"{self._log_prefix()} get_tags: invalid metadata for hash {file_hash}")
return [], "unknown"
# Extract tags using service name
service_name = "my tags"
service_key = hydrus_wrapper.get_tag_service_key(client, service_name)
# Extract tags from metadata
tags = self._extract_tags_from_hydrus_meta(meta, service_key, service_name)
return [str(t).strip().lower() for t in tags if isinstance(t, str) and t.strip()], "hydrus"
return [
str(t).strip().lower() for t in tags if isinstance(t, str) and t.strip()
], "hydrus"
except Exception as exc:
debug(f"{self._log_prefix()} get_tags failed: {exc}")
return [], "unknown"
def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
"""Add tags to a Hydrus file.
"""
"""Add tags to a Hydrus file."""
try:
client = self._client
if client is None:
@@ -1214,7 +1266,11 @@ class HydrusNetwork(Store):
return False
service_name = kwargs.get("service_name") or "my tags"
incoming_tags = [str(t).strip().lower() for t in (tags or []) if isinstance(t, str) and str(t).strip()]
incoming_tags = [
str(t).strip().lower()
for t in (tags or [])
if isinstance(t, str) and str(t).strip()
]
if not incoming_tags:
return True
@@ -1225,7 +1281,9 @@ class HydrusNetwork(Store):
from metadata import compute_namespaced_tag_overwrite
tags_to_remove, tags_to_add, _merged = compute_namespaced_tag_overwrite(existing_tags, incoming_tags)
tags_to_remove, tags_to_add, _merged = compute_namespaced_tag_overwrite(
existing_tags, incoming_tags
)
if not tags_to_add and not tags_to_remove:
return True
@@ -1250,8 +1308,7 @@ class HydrusNetwork(Store):
return False
def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
"""Delete tags from a Hydrus file.
"""
"""Delete tags from a Hydrus file."""
try:
client = self._client
if client is None:
@@ -1264,7 +1321,9 @@ class HydrusNetwork(Store):
return False
service_name = kwargs.get("service_name") or "my tags"
raw_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
tag_list = [str(t).strip().lower() for t in raw_list if isinstance(t, str) and str(t).strip()]
tag_list = [
str(t).strip().lower() for t in raw_list if isinstance(t, str) and str(t).strip()
]
if not tag_list:
return False
client.delete_tag(file_hash, tag_list, service_name)
@@ -1274,13 +1333,9 @@ class HydrusNetwork(Store):
return False
def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]:
"""Get known url for a Hydrus file.
"""
"""Get known url for a Hydrus file."""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} get_url: client unavailable")
return []
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
@@ -1292,12 +1347,7 @@ class HydrusNetwork(Store):
return []
meta = items[0] if isinstance(items[0], dict) else {}
raw_urls: Any = (
meta.get("known_urls")
or meta.get("urls")
or meta.get("url")
or []
)
raw_urls: Any = meta.get("known_urls") or meta.get("urls") or meta.get("url") or []
if isinstance(raw_urls, str):
val = raw_urls.strip()
return [val] if val else []
@@ -1316,8 +1366,7 @@ class HydrusNetwork(Store):
return []
def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
"""Associate one or more url with a Hydrus file.
"""
"""Associate one or more url with a Hydrus file."""
try:
client = self._client
if client is None:
@@ -1344,11 +1393,11 @@ class HydrusNetwork(Store):
return False
any_success = False
for file_identifier, urls in (items or []):
for file_identifier, urls in items or []:
h = str(file_identifier or "").strip().lower()
if len(h) != 64:
continue
for u in (urls or []):
for u in urls or []:
s = str(u or "").strip()
if not s:
continue
@@ -1363,8 +1412,7 @@ class HydrusNetwork(Store):
return False
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
"""Delete one or more url from a Hydrus file.
"""
"""Delete one or more url from a Hydrus file."""
try:
client = self._client
if client is None:
@@ -1453,35 +1501,35 @@ class HydrusNetwork(Store):
@staticmethod
def _extract_tags_from_hydrus_meta(
meta: Dict[str, Any],
service_key: Optional[str],
service_name: str
meta: Dict[str, Any], service_key: Optional[str], service_name: str
) -> List[str]:
"""Extract current tags from Hydrus metadata dict.
Prefers display_tags (includes siblings/parents, excludes deleted).
Falls back to storage_tags status '0' (current).
"""
tags_payload = meta.get("tags")
if not isinstance(tags_payload, dict):
return []
svc_data = None
if service_key:
svc_data = tags_payload.get(service_key)
if not isinstance(svc_data, dict):
return []
# Prefer display_tags (Hydrus computes siblings/parents)
display = svc_data.get("display_tags")
if isinstance(display, list) and display:
return [str(t) for t in display if isinstance(t, (str, bytes)) and str(t).strip()]
# Fallback to storage_tags status '0' (current)
storage = svc_data.get("storage_tags")
if isinstance(storage, dict):
current_list = storage.get("0") or storage.get(0)
if isinstance(current_list, list):
return [str(t) for t in current_list if isinstance(t, (str, bytes)) and str(t).strip()]
return [
str(t) for t in current_list if isinstance(t, (str, bytes)) and str(t).strip()
]
return []
+6 -4
View File
@@ -57,7 +57,7 @@ class Store(ABC):
Default behavior is to call add_url() per file.
"""
changed_any = False
for file_identifier, urls in (items or []):
for file_identifier, urls in items or []:
try:
ok = self.add_url(file_identifier, urls, **kwargs)
changed_any = changed_any or bool(ok)
@@ -72,7 +72,7 @@ class Store(ABC):
Default behavior is to call delete_url() per file.
"""
changed_any = False
for file_identifier, urls in (items or []):
for file_identifier, urls in items or []:
try:
ok = self.delete_url(file_identifier, urls, **kwargs)
changed_any = changed_any or bool(ok)
@@ -87,7 +87,7 @@ class Store(ABC):
Default behavior is to call set_note() per file.
"""
changed_any = False
for file_identifier, name, text in (items or []):
for file_identifier, name, text in items or []:
try:
ok = self.set_note(file_identifier, name, text, **kwargs)
changed_any = changed_any or bool(ok)
@@ -112,7 +112,9 @@ class Store(ABC):
"""Add or replace a named note for a file."""
raise NotImplementedError
def selector(self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any) -> bool:
def selector(
self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any
) -> bool:
"""Optional hook for handling `@N` selection semantics.
Return True if the selection was handled and default behavior should be skipped.
+16 -5
View File
@@ -88,7 +88,9 @@ def _required_keys_for(store_cls: Type[BaseStore]) -> list[str]:
raise TypeError(f"Unsupported __new__.keys type for {store_cls.__name__}: {type(keys)}")
def _build_kwargs(store_cls: Type[BaseStore], instance_name: str, instance_config: Any) -> Dict[str, Any]:
def _build_kwargs(
store_cls: Type[BaseStore], instance_name: str, instance_config: Any
) -> Dict[str, Any]:
if isinstance(instance_config, dict):
cfg_dict = dict(instance_config)
else:
@@ -97,7 +99,10 @@ def _build_kwargs(store_cls: Type[BaseStore], instance_name: str, instance_confi
required = _required_keys_for(store_cls)
# If NAME is required but not present, allow the instance key to provide it.
if any(_normalize_config_key(k) == "NAME" for k in required) and _get_case_insensitive(cfg_dict, "NAME") is None:
if (
any(_normalize_config_key(k) == "NAME" for k in required)
and _get_case_insensitive(cfg_dict, "NAME") is None
):
cfg_dict["NAME"] = str(instance_name)
kwargs: Dict[str, Any] = {}
@@ -116,14 +121,18 @@ def _build_kwargs(store_cls: Type[BaseStore], instance_name: str, instance_confi
class Store:
def __init__(self, config: Optional[Dict[str, Any]] = None, suppress_debug: bool = False) -> None:
def __init__(
self, config: Optional[Dict[str, Any]] = None, suppress_debug: bool = False
) -> None:
self._config = config or {}
self._suppress_debug = suppress_debug
self._backends: Dict[str, BaseStore] = {}
self._backend_errors: Dict[str, str] = {}
self._load_backends()
def _maybe_register_temp_alias(self, store_type: str, backend_name: str, kwargs: Dict[str, Any], backend: BaseStore) -> None:
def _maybe_register_temp_alias(
self, store_type: str, backend_name: str, kwargs: Dict[str, Any], backend: BaseStore
) -> None:
"""If a folder backend points at config['temp'], also expose it as the 'temp' backend.
This keeps config compatibility (e.g. existing 'default') while presenting the temp
@@ -236,7 +245,9 @@ class Store:
def __getitem__(self, backend_name: str) -> BaseStore:
if backend_name not in self._backends:
raise KeyError(f"Unknown store backend: {backend_name}. Available: {list(self._backends.keys())}")
raise KeyError(
f"Unknown store backend: {backend_name}. Available: {list(self._backends.keys())}"
)
return self._backends[backend_name]
def is_available(self, backend_name: str) -> bool:
+20 -13
View File
@@ -11,7 +11,7 @@ logger = logging.getLogger(__name__)
class AccessModal(ModalScreen):
"""Modal to display a file/URL that can be accessed from phone browser."""
CSS = """
Screen {
align: center middle;
@@ -65,10 +65,10 @@ class AccessModal(ModalScreen):
margin-right: 1;
}
"""
def __init__(self, title: str, content: str, is_url: bool = False):
"""Initialize access modal.
Args:
title: Title of the item being accessed
content: The URL or file path
@@ -78,20 +78,20 @@ class AccessModal(ModalScreen):
self.item_title = title
self.item_content = content
self.is_url = is_url
def compose(self) -> ComposeResult:
"""Create the modal layout."""
with Container(id="access-container"):
with Vertical(id="access-header"):
yield Label(f"[bold]{self.item_title}[/bold]")
yield Label("[dim]Click link below to open in your browser[/dim]")
with Vertical(id="access-content"):
if self.is_url:
yield Label("[bold cyan]Link:[/bold cyan]", classes="access-label")
else:
yield Label("[bold cyan]File:[/bold cyan]", classes="access-label")
# Display as clickable link using HTML link element for web mode
# Rich link markup `[link=URL]` has parsing issues with url containing special chars
# Instead, use the HTML link markup that Textual-serve renders as <a> tag
@@ -99,16 +99,19 @@ class AccessModal(ModalScreen):
link_text = f'[link="{self.item_content}"]Open in Browser[/link]'
content_box = Static(link_text, classes="access-url")
yield content_box
# Also show the URL for reference/copying
yield Label(self.item_content, classes="access-label")
yield Label("\n[yellow]↑ Click the link above to open on your device[/yellow]", classes="access-label")
yield Label(
"\n[yellow]↑ Click the link above to open on your device[/yellow]",
classes="access-label",
)
with Horizontal(id="access-footer"):
yield Button("Copy URL", id="copy-btn", variant="primary")
yield Button("Close", id="close-btn", variant="default")
def on_button_pressed(self, event: Button.Pressed) -> None:
"""Handle button presses."""
if event.button.id == "copy-btn":
@@ -118,19 +121,23 @@ class AccessModal(ModalScreen):
# Try to use pyperclip if available
try:
import pyperclip
pyperclip.copy(self.item_content)
logger.info("URL copied to clipboard via pyperclip")
except ImportError:
# Fallback: try xclip on Linux or pbcopy on Mac
import subprocess
import sys
if sys.platform == "win32":
# Windows: use clipboard via pyperclip (already tried)
logger.debug("Windows clipboard not available without pyperclip")
else:
# Linux/Mac
process = subprocess.Popen(['xclip', '-selection', 'clipboard'], stdin=subprocess.PIPE)
process.communicate(self.item_content.encode('utf-8'))
process = subprocess.Popen(
["xclip", "-selection", "clipboard"], stdin=subprocess.PIPE
)
process.communicate(self.item_content.encode("utf-8"))
logger.info("URL copied to clipboard via xclip")
except Exception as e:
logger.debug(f"Clipboard copy not available: {e}")
File diff suppressed because it is too large Load Diff
+212 -141
View File
@@ -23,16 +23,21 @@ logger = logging.getLogger(__name__)
class ExportModal(ModalScreen):
"""Modal screen for exporting files with metadata and tags."""
BINDINGS = [
Binding("escape", "cancel", "Cancel"),
]
CSS_PATH = "export.tcss"
def __init__(self, result_data: Optional[dict] = None, hydrus_available: bool = False, debrid_available: bool = False):
def __init__(
self,
result_data: Optional[dict] = None,
hydrus_available: bool = False,
debrid_available: bool = False,
):
"""Initialize the export modal with result data.
Args:
result_data: Dictionary containing:
- title: str - Item title
@@ -57,22 +62,22 @@ class ExportModal(ModalScreen):
self.file_ext: Optional[str] = None # Store the file extension for format filtering
self.file_type: Optional[str] = None # Store the file type (audio, video, image, document)
self.default_format: Optional[str] = None # Store the default format to set after mount
def _determine_file_type(self, ext: str) -> tuple[str, list]:
"""Determine file type from extension and return type and format options.
Args:
ext: File extension (e.g., '.mp3', '.mp4', '.jpg')
Returns:
Tuple of (file_type, format_options) where format_options is a list of (label, value) tuples
"""
ext_lower = ext.lower() if ext else ''
ext_lower = ext.lower() if ext else ""
from SYS.utils_constant import mime_maps
found_type = "unknown"
# Find type based on extension
for category, formats in mime_maps.items():
for fmt_key, fmt_info in formats.items():
@@ -81,13 +86,13 @@ class ExportModal(ModalScreen):
break
if found_type != "unknown":
break
# Build format options for the found type
format_options = []
# If unknown, fallback to audio (matching legacy behavior)
target_type = found_type if found_type in mime_maps else "audio"
if target_type in mime_maps:
# Sort formats alphabetically
sorted_formats = sorted(mime_maps[target_type].items())
@@ -95,15 +100,21 @@ class ExportModal(ModalScreen):
label = fmt_key.upper()
value = fmt_key
format_options.append((label, value))
return (target_type, format_options)
def _get_library_options(self) -> list:
"""Get available library options from config.conf."""
options = [("Local", "local")]
try:
from config import load_config, get_hydrus_access_key, get_hydrus_url, get_debrid_api_key
from config import (
load_config,
get_hydrus_access_key,
get_hydrus_url,
get_debrid_api_key,
)
config = load_config()
hydrus_url = (get_hydrus_url(config, "home") or "").strip()
@@ -114,29 +125,39 @@ class ExportModal(ModalScreen):
debrid_api_key = get_debrid_api_key(config)
if self.debrid_available and debrid_api_key:
options.append(("Debrid", "debrid"))
except Exception as e:
logger.error(f"Error loading config for libraries: {e}")
return options
def _get_metadata_text(self) -> str:
"""Format metadata from result data in a consistent display format."""
metadata = self.result_data.get('metadata', {})
source = self.result_data.get('source', 'unknown')
logger.info(f"_get_metadata_text called - source: {source}, metadata type: {type(metadata)}, keys: {list(metadata.keys()) if metadata else 'empty'}")
metadata = self.result_data.get("metadata", {})
source = self.result_data.get("source", "unknown")
logger.info(
f"_get_metadata_text called - source: {source}, metadata type: {type(metadata)}, keys: {list(metadata.keys()) if metadata else 'empty'}"
)
if not metadata:
logger.info(f"_get_metadata_text - No metadata found, returning 'No metadata available'")
logger.info(
f"_get_metadata_text - No metadata found, returning 'No metadata available'"
)
return "No metadata available"
lines = []
# Only display these specific fields in this order
display_fields = [
'duration', 'size', 'ext', 'media_type', 'time_imported', 'time_modified', 'hash'
"duration",
"size",
"ext",
"media_type",
"time_imported",
"time_modified",
"hash",
]
# Display fields in a consistent order
for field in display_fields:
if field in metadata:
@@ -147,9 +168,9 @@ class ExportModal(ModalScreen):
# Use central formatting rule
formatted_value = format_metadata_value(field, value)
# Format: "Field Name: value"
field_label = field.replace('_', ' ').title()
field_label = field.replace("_", " ").title()
lines.append(f"{field_label}: {formatted_value}")
# If we found any fields, display them
if lines:
logger.info(f"_get_metadata_text - Returning {len(lines)} formatted metadata lines")
@@ -157,12 +178,12 @@ class ExportModal(ModalScreen):
else:
logger.info(f"_get_metadata_text - No matching fields found in metadata")
return "No metadata available"
def compose(self) -> ComposeResult:
"""Compose the export modal screen."""
with Container(id="export-container"):
yield Static("Export File with Metadata", id="export-title")
# Row 1: Three columns (Tag, Metadata, Export-To Options)
self.tags_textarea = TextArea(
text=self._format_tags(),
@@ -171,7 +192,7 @@ class ExportModal(ModalScreen):
)
yield self.tags_textarea
self.tags_textarea.border_title = "Tag"
# Metadata display instead of files tree
self.metadata_display = Static(
self._get_metadata_text(),
@@ -179,120 +200,128 @@ class ExportModal(ModalScreen):
)
yield self.metadata_display
self.metadata_display.border = ("solid", "dodgerblue")
# Right column: Export options
with Vertical(id="export-options"):
# Export To selector
self.export_to_select = Select(
[("0x0", "0x0"), ("Libraries", "libraries"), ("Custom Path", "path")],
id="export-to-select"
id="export-to-select",
)
yield self.export_to_select
# Libraries selector (initially hidden)
library_options = self._get_library_options()
self.libraries_select = Select(
library_options,
id="libraries-select"
)
self.libraries_select = Select(library_options, id="libraries-select")
yield self.libraries_select
# Custom path input (initially hidden)
self.custom_path_input = Input(
placeholder="Enter custom export path",
id="custom-path-input"
placeholder="Enter custom export path", id="custom-path-input"
)
yield self.custom_path_input
# Get metadata for size and format options
metadata = self.result_data.get('metadata', {})
original_size = metadata.get('size', '')
ext = metadata.get('ext', '')
metadata = self.result_data.get("metadata", {})
original_size = metadata.get("size", "")
ext = metadata.get("ext", "")
# Store the extension and determine file type
self.file_ext = ext
self.file_type, format_options = self._determine_file_type(ext)
# Format size in MB for display
if original_size:
size_mb = int(original_size / (1024 * 1024)) if isinstance(original_size, (int, float)) else original_size
size_mb = (
int(original_size / (1024 * 1024))
if isinstance(original_size, (int, float))
else original_size
)
size_display = f"{size_mb}Mb"
else:
size_display = ""
# Size input
self.size_input = Input(
value=size_display,
placeholder="Size (can reduce)",
id="size-input",
disabled=(self.file_type == 'document') # Disable for documents - no resizing needed
disabled=(
self.file_type == "document"
), # Disable for documents - no resizing needed
)
yield self.size_input
# Determine the default format value (match current extension to format options)
default_format = None
if ext and format_options:
# Map extension to format value (e.g., .flac -> "flac", .mp3 -> "mp3", .m4a -> "m4a")
ext_lower = ext.lower().lstrip('.') # Remove leading dot if present
ext_lower = ext.lower().lstrip(".") # Remove leading dot if present
# Try to find matching format option
for _, value in format_options:
if value and (ext_lower == value or f".{ext_lower}" == ext or ext.endswith(f".{value}")):
if value and (
ext_lower == value
or f".{ext_lower}" == ext
or ext.endswith(f".{value}")
):
default_format = value
logger.debug(f"Matched extension {ext} to format {value}")
break
# If no exact match, use first option
if not default_format and format_options:
default_format = format_options[0][1]
logger.debug(f"No format match for {ext}, using first option: {default_format}")
logger.debug(
f"No format match for {ext}, using first option: {default_format}"
)
# Store the default format to apply after mount
self.default_format = default_format
# Format selector based on file type
self.format_select = Select(
format_options if format_options else [("No conversion", "")],
id="format-select",
disabled=not format_options # Disable if no format options (e.g., documents)
disabled=not format_options, # Disable if no format options (e.g., documents)
)
yield self.format_select
# Row 2: Buttons
with Horizontal(id="export-buttons"):
yield Button("Cancel", id="cancel-btn", variant="default")
yield Button("Export", id="export-btn", variant="primary")
def _format_tags(self) -> str:
"""Format tags from result data."""
tags = self.result_data.get('tags', '')
tags = self.result_data.get("tags", "")
if isinstance(tags, str):
# Split by comma and rejoin with newlines
tags_list = [tag.strip() for tag in tags.split(',') if tag.strip()]
return '\n'.join(tags_list)
tags_list = [tag.strip() for tag in tags.split(",") if tag.strip()]
return "\n".join(tags_list)
elif isinstance(tags, list):
return '\n'.join(tags)
return ''
return "\n".join(tags)
return ""
def on_button_pressed(self, event: Button.Pressed) -> None:
"""Handle button press events."""
button_id = event.button.id
if button_id == "export-btn":
self._handle_export()
elif button_id == "cancel-btn":
self.action_cancel()
def on_select_changed(self, event: Select.Changed) -> None:
"""Handle select widget changes."""
if event.control.id == "export-to-select":
# Show/hide custom path and libraries based on selection
if self.custom_path_input:
self.custom_path_input.display = (event.value == "path")
self.custom_path_input.display = event.value == "path"
if self.libraries_select:
self.libraries_select.display = (event.value == "libraries")
self.libraries_select.display = event.value == "libraries"
elif event.control.id == "libraries-select":
# Handle library selection (no special action needed currently)
logger.debug(f"Library selected: {event.value}")
def on_mount(self) -> None:
"""Handle mount event."""
# Initially hide custom path and libraries inputs (default is "0x0")
@@ -300,24 +329,27 @@ class ExportModal(ModalScreen):
self.custom_path_input.display = False
if self.libraries_select:
self.libraries_select.display = False
# Set the default format value to show it selected instead of "Select"
if self.default_format and self.format_select:
self.format_select.value = self.default_format
logger.debug(f"Set format selector to default value: {self.default_format}")
# Refresh metadata display after mount to ensure data is loaded
if self.metadata_display:
metadata_text = self._get_metadata_text()
self.metadata_display.update(metadata_text)
logger.debug(f"Updated metadata display on mount: {bool(self.result_data.get('metadata'))}")
logger.debug(
f"Updated metadata display on mount: {bool(self.result_data.get('metadata'))}"
)
def _handle_export(self) -> None:
"""Handle the export action."""
try:
tags_text = self.tags_textarea.text.strip()
export_to = self.export_to_select.value if self.export_to_select else "0x0"
custom_path = self.custom_path_input.value.strip() if self.custom_path_input else ""
# Get library value - handle Select.BLANK case
library = "local" # default
if self.libraries_select and str(self.libraries_select.value) != "Select.BLANK":
@@ -331,55 +363,87 @@ class ExportModal(ModalScreen):
library = options[0][1] # Get the value part of first option tuple
except Exception:
library = "local"
size = self.size_input.value.strip() if self.size_input else ""
file_format = self.format_select.value if self.format_select else "mp4"
# Parse tags from textarea (one per line)
export_tags = set()
for line in tags_text.split('\n'):
for line in tags_text.split("\n"):
tag = line.strip()
if tag:
export_tags.add(tag)
# For Hydrus export, filter out metadata-only tags (hash:, url:, relationship:)
if export_to == "libraries" and library == "hydrus":
metadata_prefixes = {'hash:', 'url:', 'relationship:'}
export_tags = {tag for tag in export_tags if not any(tag.lower().startswith(prefix) for prefix in metadata_prefixes)}
logger.info(f"Filtered tags for Hydrus - removed metadata tags, {len(export_tags)} tags remaining")
metadata_prefixes = {"hash:", "url:", "relationship:"}
export_tags = {
tag
for tag in export_tags
if not any(tag.lower().startswith(prefix) for prefix in metadata_prefixes)
}
logger.info(
f"Filtered tags for Hydrus - removed metadata tags, {len(export_tags)} tags remaining"
)
# Extract title and add as searchable tags if not already present
title = self.result_data.get('title', '').strip()
title = self.result_data.get("title", "").strip()
if title:
# Add the full title as a tag if not already present
title_tag = f"title:{title}"
if title_tag not in export_tags and not any(t.startswith('title:') for t in export_tags):
if title_tag not in export_tags and not any(
t.startswith("title:") for t in export_tags
):
export_tags.add(title_tag)
# Extract individual words from title as searchable tags (if reasonable length)
# Skip very short words and common stop words
if len(title) < 100: # Only for reasonably short titles
stop_words = {'the', 'a', 'an', 'and', 'or', 'of', 'in', 'to', 'for', 'is', 'it', 'at', 'by', 'from', 'with', 'as', 'be', 'on', 'that', 'this', 'this'}
stop_words = {
"the",
"a",
"an",
"and",
"or",
"of",
"in",
"to",
"for",
"is",
"it",
"at",
"by",
"from",
"with",
"as",
"be",
"on",
"that",
"this",
"this",
}
words = title.lower().split()
for word in words:
# Clean up word (remove punctuation)
clean_word = ''.join(c for c in word if c.isalnum())
clean_word = "".join(c for c in word if c.isalnum())
# Only add if not a stop word and has some length
if clean_word and len(clean_word) > 2 and clean_word not in stop_words:
if clean_word not in export_tags:
export_tags.add(clean_word)
logger.info(f"Extracted {len(words)} words from title, added searchable title tags")
logger.info(
f"Extracted {len(words)} words from title, added searchable title tags"
)
# Validate required fields - allow export to continue for Hydrus even with 0 actual tags
# (metadata tags will still be in the sidecar, and tags can be added later)
if not export_tags and export_to != "libraries":
logger.warning("No tags provided for export")
return
if export_to == "libraries" and not export_tags:
logger.warning("No actual tags for Hydrus export (only metadata was present)")
# Don't return - allow export to continue, file will be added to Hydrus even without tags
# Determine export path
export_path = None
if export_to == "path":
@@ -391,61 +455,62 @@ class ExportModal(ModalScreen):
export_path = library # "local", "hydrus", "debrid"
else:
export_path = export_to # "0x0"
# Get metadata from result_data
metadata = self.result_data.get('metadata', {})
metadata = self.result_data.get("metadata", {})
# Extract file source info from result_data (passed by hub-ui)
file_hash = self.result_data.get('hash')
file_url = self.result_data.get('url')
file_path = self.result_data.get('path')
source = self.result_data.get('source', 'unknown')
file_hash = self.result_data.get("hash")
file_url = self.result_data.get("url")
file_path = self.result_data.get("path")
source = self.result_data.get("source", "unknown")
# Prepare export data
export_data = {
'export_to': export_to,
'export_path': export_path,
'library': library if export_to == "libraries" else None,
'tags': export_tags,
'size': size if size else None,
'format': file_format,
'metadata': metadata,
'original_data': self.result_data,
'hash': file_hash,
'url': file_url,
'path': file_path,
'source': source,
"export_to": export_to,
"export_path": export_path,
"library": library if export_to == "libraries" else None,
"tags": export_tags,
"size": size if size else None,
"format": file_format,
"metadata": metadata,
"original_data": self.result_data,
"hash": file_hash,
"url": file_url,
"path": file_path,
"source": source,
}
logger.info(f"Export initiated: destination={export_path}, format={file_format}, size={size}, tags={export_tags}, source={source}, hash={file_hash}, path={file_path}")
logger.info(
f"Export initiated: destination={export_path}, format={file_format}, size={size}, tags={export_tags}, source={source}, hash={file_hash}, path={file_path}"
)
# Dismiss the modal and return the export data
self.dismiss(export_data)
except Exception as e:
logger.error(f"Error during export: {e}", exc_info=True)
def action_cancel(self) -> None:
"""Handle cancel action."""
self.dismiss(None)
def create_notes_sidecar(file_path: Path, notes: str) -> None:
"""Create a .notes sidecar file with notes text.
Only creates file if notes are not empty.
Args:
file_path: Path to the exported file
notes: Notes text
"""
if not notes or not notes.strip():
return
notes_path = file_path.with_suffix(file_path.suffix + '.notes')
notes_path = file_path.with_suffix(file_path.suffix + ".notes")
try:
with open(notes_path, 'w', encoding='utf-8') as f:
with open(notes_path, "w", encoding="utf-8") as f:
f.write(notes.strip())
logger.info(f"Created notes sidecar: {notes_path}")
except Exception as e:
@@ -454,50 +519,56 @@ def create_notes_sidecar(file_path: Path, notes: str) -> None:
def determine_needs_conversion(current_ext: str, target_format: str) -> bool:
"""Determine if conversion is needed between two formats.
Args:
current_ext: Current file extension (e.g., '.flac')
target_format: Target format name (e.g., 'mp3') or NoSelection object
Returns:
True if conversion is needed, False if it's already the target format
"""
# Handle NoSelection or None
if not target_format or target_format == "" or str(target_format.__class__.__name__) == 'NoSelection':
if (
not target_format
or target_format == ""
or str(target_format.__class__.__name__) == "NoSelection"
):
return False # No conversion requested
# Normalize the current extension
current_ext_lower = current_ext.lower().lstrip('.')
current_ext_lower = current_ext.lower().lstrip(".")
target_format_lower = str(target_format).lower()
# Check if they match
return current_ext_lower != target_format_lower
def calculate_size_tolerance(metadata: dict, user_size_mb: Optional[str]) -> tuple[Optional[int], Optional[int]]:
def calculate_size_tolerance(
metadata: dict, user_size_mb: Optional[str]
) -> tuple[Optional[int], Optional[int]]:
"""Calculate target size with 1MB grace period.
Args:
metadata: File metadata containing 'size' in bytes
user_size_mb: User-entered size like "756Mb" or empty string
Returns:
Tuple of (target_bytes, grace_bytes) where grace_bytes is 1MB (1048576),
or (None, None) if no size specified
"""
grace_bytes = 1 * 1024 * 1024 # 1MB grace period
if not user_size_mb or not user_size_mb.strip():
return None, grace_bytes
try:
# Parse the size string (format like "756Mb")
size_str = user_size_mb.strip().lower()
if size_str.endswith('mb'):
if size_str.endswith("mb"):
size_str = size_str[:-2]
elif size_str.endswith('m'):
elif size_str.endswith("m"):
size_str = size_str[:-1]
size_mb = float(size_str)
target_bytes = int(size_mb * 1024 * 1024)
return target_bytes, grace_bytes
+87 -80
View File
@@ -23,24 +23,25 @@ logger = logging.getLogger(__name__)
class SearchModal(ModalScreen):
"""Modal screen for searching OpenLibrary and Soulseek."""
BINDINGS = [
Binding("escape", "cancel", "Cancel"),
Binding("enter", "search_focused", "Search"),
Binding("ctrl+t", "scrape_tags", "Scrape Tags"),
]
CSS_PATH = "search.tcss"
class SearchSelected(Message):
"""Posted when user selects a search result."""
def __init__(self, result: dict) -> None:
self.result = result
super().__init__()
def __init__(self, app_instance=None):
"""Initialize the search modal.
Args:
app_instance: Reference to the main App instance for worker creation
"""
@@ -55,108 +56,98 @@ class SearchModal(ModalScreen):
self.current_result_table: Optional[ResultTable] = None
self.is_searching = False
self.current_worker = None # Track worker for search operations
def compose(self) -> ComposeResult:
"""Create child widgets for the search modal."""
with Vertical(id="search-container"):
yield Static("Search Books & Music", id="search-title")
with Horizontal(id="search-controls"):
# Source selector
self.source_select = Select(
[("OpenLibrary", "openlibrary"), ("Soulseek", "soulseek")],
value="openlibrary",
id="source-select"
id="source-select",
)
yield self.source_select
# Search input
self.search_input = Input(
placeholder="Enter search query...",
id="search-input"
)
self.search_input = Input(placeholder="Enter search query...", id="search-input")
yield self.search_input
# Search button
yield Button("Search", id="search-button", variant="primary")
# Results table
self.results_table = DataTable(id="results-table")
yield self.results_table
# Two-column layout: tags on left, source/submit on right
with Horizontal(id="bottom-controls"):
# Left column: Tags textarea
with Vertical(id="tags-column"):
self.tags_textarea = TextArea(
text="",
id="result-tags-textarea",
read_only=False
text="", id="result-tags-textarea", read_only=False
)
self.tags_textarea.border_title = "Tags [Ctrl+T: Scrape]"
yield self.tags_textarea
# Right column: Library source and submit button
with Vertical(id="source-submit-column"):
# Library source selector (for OpenLibrary results)
self.library_source_select = Select(
[("Local", "local"), ("Download", "download")],
value="local",
id="library-source-select"
id="library-source-select",
)
yield self.library_source_select
# Submit button
yield Button("Submit", id="submit-button", variant="primary")
# Buttons at bottom
with Horizontal(id="search-buttons"):
yield Button("Select", id="select-button", variant="primary")
yield Button("Download", id="download-button", variant="primary")
yield Button("Cancel", id="cancel-button", variant="default")
def on_mount(self) -> None:
"""Set up the table columns and focus."""
# Set up results table columns
self.results_table.add_columns(
"Title",
"Author/Artist",
"Year/Album",
"Details"
)
self.results_table.add_columns("Title", "Author/Artist", "Year/Album", "Details")
# Focus on search input
self.search_input.focus()
async def _perform_search(self) -> None:
"""Perform the actual search based on selected source."""
if not self.search_input or not self.source_select or not self.results_table:
logger.error("[search-modal] Widgets not initialized")
return
query = self.search_input.value.strip()
if not query:
logger.warning("[search-modal] Empty search query")
return
source = self.source_select.value
# Clear existing results
self.results_table.clear(columns=True)
self.current_results = []
self.current_result_table = None
self.is_searching = True
# Create worker for tracking
if self.app_instance and hasattr(self.app_instance, 'create_worker'):
if self.app_instance and hasattr(self.app_instance, "create_worker"):
self.current_worker = self.app_instance.create_worker(
source,
title=f"{source.capitalize()} Search: {query[:40]}",
description=f"Searching {source} for: {query}"
description=f"Searching {source} for: {query}",
)
self.current_worker.log_step(f"Connecting to {source}...")
try:
provider = get_search_provider(source)
if not provider:
@@ -168,10 +159,10 @@ class SearchModal(ModalScreen):
logger.info(f"[search-modal] Searching {source} for: {query}")
results = provider.search(query, limit=20)
self.current_results = results
if self.current_worker:
self.current_worker.log_step(f"Found {len(results)} results")
# Create ResultTable
table = ResultTable(f"Search Results: {query}")
for res in results:
@@ -183,10 +174,16 @@ class SearchModal(ModalScreen):
else:
# Fallback if no columns defined
row.add_column("Title", res.title)
row.add_column("Target", getattr(res, 'path', None) or getattr(res, 'url', None) or getattr(res, 'target', None) or '')
row.add_column(
"Target",
getattr(res, "path", None)
or getattr(res, "url", None)
or getattr(res, "target", None)
or "",
)
self.current_result_table = table
# Populate UI
if table.rows:
# Add headers
@@ -198,27 +195,27 @@ class SearchModal(ModalScreen):
else:
self.results_table.add_columns("Message")
self.results_table.add_row("No results found")
# Finish worker
if self.current_worker:
self.current_worker.finish("completed", f"Found {len(results)} results")
except Exception as e:
logger.error(f"[search-modal] Search error: {e}", exc_info=True)
if self.current_worker:
self.current_worker.finish("error", f"Search failed: {str(e)}")
finally:
self.is_searching = False
def on_button_pressed(self, event: Button.Pressed) -> None:
"""Handle button presses."""
button_id = event.button.id
if button_id == "search-button":
# Run search asynchronously
asyncio.create_task(self._perform_search())
elif button_id == "select-button":
# Get selected row and populate tags textarea
if self.results_table and self.results_table.row_count > 0:
@@ -229,7 +226,7 @@ class SearchModal(ModalScreen):
self._populate_tags_from_result(result)
else:
logger.warning("[search-modal] No results to select")
elif button_id == "download-button":
# Download the selected result
if self.current_results and self.results_table.row_count > 0:
@@ -239,67 +236,75 @@ class SearchModal(ModalScreen):
if getattr(result, "table", "") == "openlibrary":
asyncio.create_task(self._download_book(result))
else:
logger.warning("[search-modal] Download only supported for OpenLibrary results")
logger.warning(
"[search-modal] Download only supported for OpenLibrary results"
)
else:
logger.warning("[search-modal] No result selected for download")
elif button_id == "submit-button":
# Submit the current result with tags and source
if self.current_results and self.results_table.row_count > 0:
selected_row = self.results_table.cursor_row
if 0 <= selected_row < len(self.current_results):
result = self.current_results[selected_row]
# Convert to dict if needed for submission
if hasattr(result, 'to_dict'):
if hasattr(result, "to_dict"):
result_dict = result.to_dict()
else:
result_dict = result
# Get tags from textarea
tags_text = self.tags_textarea.text if self.tags_textarea else ""
# Get library source (if OpenLibrary)
library_source = self.library_source_select.value if self.library_source_select else "local"
library_source = (
self.library_source_select.value if self.library_source_select else "local"
)
# Add tags and source to result
result_dict["tags_text"] = tags_text
result_dict["library_source"] = library_source
# Post message and dismiss
self.post_message(self.SearchSelected(result_dict))
self.dismiss(result_dict)
else:
logger.warning("[search-modal] No result selected for submission")
elif button_id == "cancel-button":
self.dismiss(None)
def _populate_tags_from_result(self, result: Any) -> None:
"""Populate the tags textarea from a selected result."""
if not self.tags_textarea:
return
# Handle both SearchResult objects and dicts
if hasattr(result, 'full_metadata'):
if hasattr(result, "full_metadata"):
metadata = result.full_metadata or {}
source = result.table
title = result.title
else:
# Handle dict (legacy or from to_dict)
if 'full_metadata' in result:
metadata = result['full_metadata'] or {}
elif 'raw_data' in result:
metadata = result['raw_data'] or {}
if "full_metadata" in result:
metadata = result["full_metadata"] or {}
elif "raw_data" in result:
metadata = result["raw_data"] or {}
else:
metadata = result
source = result.get('table', '')
title = result.get('title', '')
source = result.get("table", "")
title = result.get("title", "")
# Format tags based on result source
if source == "openlibrary":
# For OpenLibrary: title, author, year
author = ", ".join(metadata.get("authors", [])) if isinstance(metadata.get("authors"), list) else metadata.get("authors", "")
author = (
", ".join(metadata.get("authors", []))
if isinstance(metadata.get("authors"), list)
else metadata.get("authors", "")
)
year = str(metadata.get("year", ""))
tags = []
if title:
@@ -325,10 +330,10 @@ class SearchModal(ModalScreen):
# Generic fallback
tags = [title]
tags_text = "\n".join(tags)
self.tags_textarea.text = tags_text
logger.info(f"[search-modal] Populated tags textarea from result")
async def _download_book(self, result: Any) -> None:
"""Download a book from OpenLibrary using the provider."""
if getattr(result, "table", "") != "openlibrary":
@@ -355,12 +360,12 @@ class SearchModal(ModalScreen):
except Exception as e:
logger.error(f"[search-modal] Download error: {e}", exc_info=True)
def action_search_focused(self) -> None:
"""Action for Enter key - only search if search input is focused."""
if self.search_input and self.search_input.has_focus and not self.is_searching:
asyncio.create_task(self._perform_search())
def action_scrape_tags(self) -> None:
"""Action for Ctrl+T - populate tags from selected result."""
if self.current_results and self.results_table and self.results_table.row_count > 0:
@@ -369,18 +374,20 @@ class SearchModal(ModalScreen):
if 0 <= selected_row < len(self.current_results):
result = self.current_results[selected_row]
self._populate_tags_from_result(result)
logger.info(f"[search-modal] Ctrl+T: Populated tags from result at row {selected_row}")
logger.info(
f"[search-modal] Ctrl+T: Populated tags from result at row {selected_row}"
)
else:
logger.warning(f"[search-modal] Ctrl+T: Invalid row index {selected_row}")
except Exception as e:
logger.error(f"[search-modal] Ctrl+T error: {e}")
else:
logger.warning("[search-modal] Ctrl+T: No results selected")
def action_cancel(self) -> None:
"""Action for Escape key - close modal."""
self.dismiss(None)
def on_input_submitted(self, event: Input.Submitted) -> None:
"""Handle Enter key in search input - only trigger search here."""
if event.input.id == "search-input":
+210 -175
View File
@@ -19,28 +19,30 @@ logger = logging.getLogger(__name__)
class WorkersModal(ModalScreen):
"""Modal screen for monitoring running and finished workers."""
BINDINGS = [
Binding("escape", "cancel", "Cancel"),
]
CSS_PATH = "workers.tcss"
class WorkerUpdated(Message):
"""Posted when worker list is updated."""
def __init__(self, workers: List[Dict[str, Any]]) -> None:
self.workers = workers
super().__init__()
class WorkerCancelled(Message):
"""Posted when user cancels a worker."""
def __init__(self, worker_id: str) -> None:
self.worker_id = worker_id
super().__init__()
def __init__(self, app_instance=None):
"""Initialize the workers modal.
Args:
app_instance: Reference to the hub app for accessing worker info
"""
@@ -53,7 +55,7 @@ class WorkersModal(ModalScreen):
self.finished_workers: List[Dict[str, Any]] = []
self.selected_worker_id: Optional[str] = None
self.show_running = False # Start with finished tab
def compose(self) -> ComposeResult:
"""Create child widgets for the workers modal."""
with Vertical(id="workers-container"):
@@ -62,110 +64,107 @@ class WorkersModal(ModalScreen):
yield Static("Workers Monitor", id="workers-title")
yield Button("Running", id="toggle-running-btn", variant="primary")
yield Button("Finished", id="toggle-finished-btn", variant="default")
# Running tab content (initially hidden)
with Vertical(id="running-section"):
self.running_table = DataTable(id="running-table")
yield self.running_table
with Horizontal(id="running-controls"):
yield Button("Refresh", id="running-refresh-btn", variant="primary")
yield Button("Stop Selected", id="running-stop-btn", variant="warning")
yield Button("Stop All", id="running-stop-all-btn", variant="error")
# Finished tab content (initially visible)
with Vertical(id="finished-section"):
self.finished_table = DataTable(id="finished-table")
yield self.finished_table
with Horizontal(id="finished-controls"):
yield Button("Refresh", id="finished-refresh-btn", variant="primary")
yield Button("Clear Selected", id="finished-clear-btn", variant="warning")
yield Button("Clear All", id="finished-clear-all-btn", variant="error")
# Shared textarea for displaying worker logs
with Vertical(id="logs-section"):
yield Static("Worker Logs:", id="logs-label")
self.stdout_display = TextArea(id="stdout-display", read_only=True)
yield self.stdout_display
with Horizontal(id="workers-buttons"):
yield Button("Close", id="close-btn", variant="primary")
def on_mount(self) -> None:
"""Set up the tables and load worker data."""
# Set up running workers table
if self.running_table:
self.running_table.add_columns(
"ID",
"Type",
"Status",
"Pipe",
"Progress",
"Started",
"Details"
"ID", "Type", "Status", "Pipe", "Progress", "Started", "Details"
)
self.running_table.zebra_stripes = True
# Set up finished workers table
if self.finished_table:
self.finished_table.add_columns(
"ID",
"Type",
"Result",
"Pipe",
"Started",
"Completed",
"Duration",
"Details"
"ID", "Type", "Result", "Pipe", "Started", "Completed", "Duration", "Details"
)
self.finished_table.zebra_stripes = True
# Set initial view (show finished by default)
self._update_view_visibility()
# Load initial data
self.refresh_workers()
# Don't set up periodic refresh - it was causing issues with stdout display
# Users can click the Refresh button to update manually
def refresh_workers(self) -> None:
"""Refresh the workers data from app instance."""
try:
if not self.app_instance:
logger.warning("[workers-modal] No app instance provided")
return
# Get running workers from app instance
# This assumes the app has a get_running_workers() method
if hasattr(self.app_instance, 'get_running_workers'):
if hasattr(self.app_instance, "get_running_workers"):
self.running_workers = self.app_instance.get_running_workers()
else:
self.running_workers = []
# Get finished workers from app instance
if hasattr(self.app_instance, 'get_finished_workers'):
if hasattr(self.app_instance, "get_finished_workers"):
self.finished_workers = self.app_instance.get_finished_workers()
if self.finished_workers:
logger.info(f"[workers-modal-refresh] Got {len(self.finished_workers)} finished workers from app")
logger.info(
f"[workers-modal-refresh] Got {len(self.finished_workers)} finished workers from app"
)
# Log the keys in the first worker to verify structure
if isinstance(self.finished_workers[0], dict):
logger.info(f"[workers-modal-refresh] First worker keys: {list(self.finished_workers[0].keys())}")
logger.info(f"[workers-modal-refresh] First worker: {self.finished_workers[0]}")
logger.info(
f"[workers-modal-refresh] First worker keys: {list(self.finished_workers[0].keys())}"
)
logger.info(
f"[workers-modal-refresh] First worker: {self.finished_workers[0]}"
)
else:
logger.warning(f"[workers-modal-refresh] First worker is not a dict: {type(self.finished_workers[0])}")
logger.warning(
f"[workers-modal-refresh] First worker is not a dict: {type(self.finished_workers[0])}"
)
else:
self.finished_workers = []
# Update tables
self._update_running_table()
self._update_finished_table()
logger.info(f"[workers-modal] Refreshed: {len(self.running_workers)} running, {len(self.finished_workers)} finished")
logger.info(
f"[workers-modal] Refreshed: {len(self.running_workers)} running, {len(self.finished_workers)} finished"
)
except Exception as e:
logger.error(f"[workers-modal] Error refreshing workers: {e}")
def _update_view_visibility(self) -> None:
"""Toggle visibility between running and finished views."""
try:
@@ -173,7 +172,7 @@ class WorkersModal(ModalScreen):
finished_section = self.query_one("#finished-section", Vertical)
toggle_running_btn = self.query_one("#toggle-running-btn", Button)
toggle_finished_btn = self.query_one("#toggle-finished-btn", Button)
if self.show_running:
running_section.display = True
finished_section.display = False
@@ -188,48 +187,52 @@ class WorkersModal(ModalScreen):
logger.debug("[workers-modal] Switched to Finished view")
except Exception as e:
logger.error(f"[workers-modal] Error updating view visibility: {e}")
def _update_running_table(self) -> None:
"""Update the running workers table."""
try:
if not self.running_table:
logger.error("[workers-modal] Running table not initialized")
return
self.running_table.clear()
if not self.running_workers:
self.running_table.add_row("---", "---", "---", "---", "---", "---", "No workers running")
self.running_table.add_row(
"---", "---", "---", "---", "---", "---", "No workers running"
)
logger.debug(f"[workers-modal] No running workers to display")
return
logger.debug(f"[workers-modal] Updating running table with {len(self.running_workers)} workers")
logger.debug(
f"[workers-modal] Updating running table with {len(self.running_workers)} workers"
)
for idx, worker_info in enumerate(self.running_workers):
try:
worker_id = worker_info.get('id', 'unknown')
worker_type = worker_info.get('type', 'unknown')
status = worker_info.get('status', 'running')
progress = worker_info.get('progress', '')
started = worker_info.get('started', '')
details = worker_info.get('details', '')
pipe = worker_info.get('pipe', '')
worker_id = worker_info.get("id", "unknown")
worker_type = worker_info.get("type", "unknown")
status = worker_info.get("status", "running")
progress = worker_info.get("progress", "")
started = worker_info.get("started", "")
details = worker_info.get("details", "")
pipe = worker_info.get("pipe", "")
# Ensure values are strings
worker_id = str(worker_id) if worker_id else 'unknown'
worker_type = str(worker_type) if worker_type else 'unknown'
status = str(status) if status else 'running'
progress = str(progress) if progress else '---'
started = str(started) if started else '---'
details = str(details) if details else '---'
worker_id = str(worker_id) if worker_id else "unknown"
worker_type = str(worker_type) if worker_type else "unknown"
status = str(status) if status else "running"
progress = str(progress) if progress else "---"
started = str(started) if started else "---"
details = str(details) if details else "---"
pipe_display = self._summarize_pipe(pipe)
# Truncate long strings
progress = progress[:20]
started = started[:19]
details = details[:30]
pipe_display = pipe_display[:40]
self.running_table.add_row(
worker_id[:8],
worker_type[:15],
@@ -237,57 +240,68 @@ class WorkersModal(ModalScreen):
pipe_display,
progress,
started,
details
details,
)
if idx == 0: # Log first entry
logger.debug(f"[workers-modal] Added running row {idx}: {worker_id[:8]} {worker_type[:15]} {status}")
logger.debug(
f"[workers-modal] Added running row {idx}: {worker_id[:8]} {worker_type[:15]} {status}"
)
except Exception as row_error:
logger.error(f"[workers-modal] Error adding running row {idx}: {row_error}", exc_info=True)
logger.debug(f"[workers-modal] Updated running table with {len(self.running_workers)} workers")
logger.error(
f"[workers-modal] Error adding running row {idx}: {row_error}",
exc_info=True,
)
logger.debug(
f"[workers-modal] Updated running table with {len(self.running_workers)} workers"
)
except Exception as e:
logger.error(f"[workers-modal] Error updating running table: {e}", exc_info=True)
def _update_finished_table(self) -> None:
"""Update the finished workers table."""
try:
if not self.finished_table:
logger.error("[workers-modal] Finished table not initialized")
return
self.finished_table.clear()
if not self.finished_workers:
self.finished_table.add_row("---", "---", "---", "---", "---", "---", "---", "No finished workers")
self.finished_table.add_row(
"---", "---", "---", "---", "---", "---", "---", "No finished workers"
)
logger.debug(f"[workers-modal] No finished workers to display")
return
logger.info(f"[workers-modal-update] STARTING to update finished table with {len(self.finished_workers)} workers")
logger.info(
f"[workers-modal-update] STARTING to update finished table with {len(self.finished_workers)} workers"
)
added_count = 0
error_count = 0
for idx, worker_info in enumerate(self.finished_workers):
try:
worker_id = worker_info.get('id', 'unknown')
worker_type = worker_info.get('type', 'unknown')
result = worker_info.get('result', 'unknown')
completed = worker_info.get('completed', '')
duration = worker_info.get('duration', '')
details = worker_info.get('details', '')
pipe = worker_info.get('pipe', '')
started = worker_info.get('started', '')
worker_id = worker_info.get("id", "unknown")
worker_type = worker_info.get("type", "unknown")
result = worker_info.get("result", "unknown")
completed = worker_info.get("completed", "")
duration = worker_info.get("duration", "")
details = worker_info.get("details", "")
pipe = worker_info.get("pipe", "")
started = worker_info.get("started", "")
# Ensure values are strings
worker_id = str(worker_id) if worker_id else 'unknown'
worker_type = str(worker_type) if worker_type else 'unknown'
result = str(result) if result else 'unknown'
completed = str(completed) if completed else '---'
duration = str(duration) if duration else '---'
details = str(details) if details else '---'
started = str(started) if started else '---'
worker_id = str(worker_id) if worker_id else "unknown"
worker_type = str(worker_type) if worker_type else "unknown"
result = str(result) if result else "unknown"
completed = str(completed) if completed else "---"
duration = str(duration) if duration else "---"
details = str(details) if details else "---"
started = str(started) if started else "---"
pipe_display = self._summarize_pipe(pipe)
# Truncate long strings
result = result[:15]
completed = completed[:19]
@@ -295,7 +309,7 @@ class WorkersModal(ModalScreen):
duration = duration[:10]
details = details[:30]
pipe_display = pipe_display[:40]
self.finished_table.add_row(
worker_id[:8],
worker_type[:15],
@@ -304,24 +318,31 @@ class WorkersModal(ModalScreen):
started,
completed,
duration,
details
details,
)
added_count += 1
except Exception as row_error:
error_count += 1
logger.error(f"[workers-modal-update] Error adding finished row {idx}: {row_error}", exc_info=True)
logger.info(f"[workers-modal-update] COMPLETED: Added {added_count}/{len(self.finished_workers)} finished workers (errors: {error_count})")
logger.debug(f"[workers-modal-update] Finished table row_count after update: {self.finished_table.row_count}")
logger.error(
f"[workers-modal-update] Error adding finished row {idx}: {row_error}",
exc_info=True,
)
logger.info(
f"[workers-modal-update] COMPLETED: Added {added_count}/{len(self.finished_workers)} finished workers (errors: {error_count})"
)
logger.debug(
f"[workers-modal-update] Finished table row_count after update: {self.finished_table.row_count}"
)
except Exception as e:
logger.error(f"[workers-modal] Error updating finished table: {e}", exc_info=True)
def on_data_table_row_highlighted(self, event: DataTable.RowHighlighted) -> None:
"""Handle row highlight in tables - display stdout."""
try:
logger.info(f"[workers-modal] Row highlighted, cursor_row: {event.cursor_row}")
# Get the selected worker from the correct table
workers_list = None
if event.control == self.running_table:
@@ -329,33 +350,39 @@ class WorkersModal(ModalScreen):
logger.debug(f"[workers-modal] Highlighted in running table")
elif event.control == self.finished_table:
workers_list = self.finished_workers
logger.debug(f"[workers-modal] Highlighted in finished table, list size: {len(workers_list)}")
logger.debug(
f"[workers-modal] Highlighted in finished table, list size: {len(workers_list)}"
)
else:
logger.warning(f"[workers-modal] Unknown table: {event.control}")
return
# Get the worker at this row
if workers_list and 0 <= event.cursor_row < len(workers_list):
worker = workers_list[event.cursor_row]
worker_id = worker.get('id', '')
worker_id = worker.get("id", "")
logger.info(f"[workers-modal] Highlighted worker: {worker_id}")
if worker_id:
self.selected_worker_id = worker_id
# Display the stdout
self._update_stdout_display(worker_id, worker)
else:
logger.warning(f"[workers-modal] Row {event.cursor_row} out of bounds for list of size {len(workers_list) if workers_list else 0}")
logger.warning(
f"[workers-modal] Row {event.cursor_row} out of bounds for list of size {len(workers_list) if workers_list else 0}"
)
except Exception as e:
logger.error(f"[workers-modal] Error handling row highlight: {e}", exc_info=True)
def on_data_table_cell_highlighted(self, event: DataTable.CellHighlighted) -> None:
"""Handle cell highlight in tables - display stdout (backup for row selection)."""
try:
# CellHighlighted has coordinate (row, column) not cursor_row
cursor_row = event.coordinate.row
logger.debug(f"[workers-modal] Cell highlighted, row: {cursor_row}, column: {event.coordinate.column}")
logger.debug(
f"[workers-modal] Cell highlighted, row: {cursor_row}, column: {event.coordinate.column}"
)
# Get the selected worker from the correct table
workers_list = None
if event.data_table == self.running_table:
@@ -363,15 +390,17 @@ class WorkersModal(ModalScreen):
logger.debug(f"[workers-modal] Cell highlighted in running table")
elif event.data_table == self.finished_table:
workers_list = self.finished_workers
logger.debug(f"[workers-modal] Cell highlighted in finished table, list size: {len(workers_list)}")
logger.debug(
f"[workers-modal] Cell highlighted in finished table, list size: {len(workers_list)}"
)
else:
return
# Get the worker at this row
if workers_list and 0 <= cursor_row < len(workers_list):
worker = workers_list[cursor_row]
worker_id = worker.get('id', '')
worker_id = worker.get("id", "")
if worker_id and worker_id != self.selected_worker_id:
logger.info(f"[workers-modal] Cell-highlighted worker: {worker_id}")
self.selected_worker_id = worker_id
@@ -379,8 +408,10 @@ class WorkersModal(ModalScreen):
self._update_stdout_display(worker_id, worker)
except Exception as e:
logger.debug(f"[workers-modal] Error handling cell highlight: {e}")
def _update_stdout_display(self, worker_id: str, worker: Optional[Dict[str, Any]] = None) -> None:
def _update_stdout_display(
self, worker_id: str, worker: Optional[Dict[str, Any]] = None
) -> None:
"""Update the stdout textarea with logs from the selected worker."""
try:
if not self.stdout_display:
@@ -400,7 +431,9 @@ class WorkersModal(ModalScreen):
logs_body = (stdout_text or "").strip()
sections.append("Logs:\n" + (logs_body if logs_body else "(no logs recorded)"))
combined_text = "\n\n".join(sections)
logger.debug(f"[workers-modal] Setting textarea to {len(combined_text)} chars (stdout_len={len(stdout_text or '')})")
logger.debug(
f"[workers-modal] Setting textarea to {len(combined_text)} chars (stdout_len={len(stdout_text or '')})"
)
self.stdout_display.text = combined_text
if len(combined_text) > 10:
try:
@@ -410,37 +443,37 @@ class WorkersModal(ModalScreen):
logger.info(f"[workers-modal] Updated stdout display successfully")
except Exception as e:
logger.error(f"[workers-modal] Error updating stdout display: {e}", exc_info=True)
def _locate_worker(self, worker_id: str) -> Optional[Dict[str, Any]]:
for worker in self.running_workers or []:
if isinstance(worker, dict) and worker.get('id') == worker_id:
if isinstance(worker, dict) and worker.get("id") == worker_id:
return worker
for worker in self.finished_workers or []:
if isinstance(worker, dict) and worker.get('id') == worker_id:
if isinstance(worker, dict) and worker.get("id") == worker_id:
return worker
return None
def _resolve_worker_stdout(self, worker_id: str, worker: Optional[Dict[str, Any]]) -> str:
if worker and worker.get('stdout'):
return worker.get('stdout', '') or ''
manager = getattr(self.app_instance, 'worker_manager', None)
if worker and worker.get("stdout"):
return worker.get("stdout", "") or ""
manager = getattr(self.app_instance, "worker_manager", None)
if manager:
try:
return manager.get_stdout(worker_id) or ''
return manager.get_stdout(worker_id) or ""
except Exception as exc:
logger.debug(f"[workers-modal] Could not fetch stdout for {worker_id}: {exc}")
return ''
return ""
def _resolve_worker_pipe(self, worker_id: str, worker: Optional[Dict[str, Any]]) -> str:
if worker and worker.get('pipe'):
return str(worker.get('pipe'))
if worker and worker.get("pipe"):
return str(worker.get("pipe"))
record = self._fetch_worker_record(worker_id)
if record and record.get('pipe'):
return str(record.get('pipe'))
return ''
if record and record.get("pipe"):
return str(record.get("pipe"))
return ""
def _fetch_worker_record(self, worker_id: str) -> Optional[Dict[str, Any]]:
manager = getattr(self.app_instance, 'worker_manager', None)
manager = getattr(self.app_instance, "worker_manager", None)
if not manager:
return None
try:
@@ -448,9 +481,9 @@ class WorkersModal(ModalScreen):
except Exception as exc:
logger.debug(f"[workers-modal] Could not fetch worker record {worker_id}: {exc}")
return None
def _get_worker_events(self, worker_id: str, limit: int = 250) -> List[Dict[str, Any]]:
manager = getattr(self.app_instance, 'worker_manager', None)
manager = getattr(self.app_instance, "worker_manager", None)
if not manager:
return []
try:
@@ -458,28 +491,28 @@ class WorkersModal(ModalScreen):
except Exception as exc:
logger.debug(f"[workers-modal] Could not fetch worker events {worker_id}: {exc}")
return []
def _format_worker_timeline(self, events: List[Dict[str, Any]]) -> str:
if not events:
return ""
lines: List[str] = []
for event in events:
timestamp = self._format_event_timestamp(event.get('created_at'))
label = (event.get('event_type') or '').upper() or 'EVENT'
channel = (event.get('channel') or '').upper()
timestamp = self._format_event_timestamp(event.get("created_at"))
label = (event.get("event_type") or "").upper() or "EVENT"
channel = (event.get("channel") or "").upper()
if channel and channel not in label:
label = f"{label}/{channel}"
step = event.get('step') or ''
message = event.get('message') or ''
prefix = ''
if event.get('event_type') == 'step' and step:
step = event.get("step") or ""
message = event.get("message") or ""
prefix = ""
if event.get("event_type") == "step" and step:
prefix = f"{step} :: "
elif step and step not in message:
prefix = f"{step} :: "
formatted_message = self._format_message_block(message)
lines.append(f"[{timestamp}] {label}: {prefix}{formatted_message}")
return "\n".join(lines)
def _format_event_timestamp(self, raw_timestamp: Any) -> str:
if not raw_timestamp:
return "--:--:--"
@@ -491,9 +524,9 @@ class WorkersModal(ModalScreen):
else:
time_part = text
return time_part[:8] if len(time_part) >= 8 else time_part
def _format_message_block(self, message: str) -> str:
clean = (message or '').strip()
clean = (message or "").strip()
if not clean:
return "(empty)"
lines = clean.splitlines()
@@ -502,31 +535,31 @@ class WorkersModal(ModalScreen):
head, *rest = lines
indented = "\n".join(f" {line}" for line in rest)
return f"{head}\n{indented}"
def _summarize_pipe(self, pipe_value: Any, limit: int = 40) -> str:
text = str(pipe_value or '').strip()
text = str(pipe_value or "").strip()
if not text:
return "(none)"
return text if len(text) <= limit else text[: limit - 3] + '...'
return text if len(text) <= limit else text[: limit - 3] + "..."
def on_button_pressed(self, event: Button.Pressed) -> None:
"""Handle button presses."""
button_id = event.button.id
try:
if button_id == "toggle-running-btn":
self.show_running = True
self._update_view_visibility()
return
elif button_id == "toggle-finished-btn":
self.show_running = False
self._update_view_visibility()
return
if button_id == "running-refresh-btn":
self.refresh_workers()
elif button_id == "running-stop-btn":
# Stop selected running worker
if self.running_table and self.running_table.row_count > 0:
@@ -534,24 +567,24 @@ class WorkersModal(ModalScreen):
selected_row = self.running_table.cursor_row
if 0 <= selected_row < len(self.running_workers):
worker = self.running_workers[selected_row]
worker_id = worker.get('id')
if self.app_instance and hasattr(self.app_instance, 'stop_worker'):
worker_id = worker.get("id")
if self.app_instance and hasattr(self.app_instance, "stop_worker"):
self.app_instance.stop_worker(worker_id)
logger.info(f"[workers-modal] Stopped worker: {worker_id}")
self.refresh_workers()
except Exception as e:
logger.error(f"[workers-modal] Error stopping worker: {e}")
elif button_id == "running-stop-all-btn":
# Stop all running workers
if self.app_instance and hasattr(self.app_instance, 'stop_all_workers'):
if self.app_instance and hasattr(self.app_instance, "stop_all_workers"):
self.app_instance.stop_all_workers()
logger.info("[workers-modal] Stopped all workers")
self.refresh_workers()
elif button_id == "finished-refresh-btn":
self.refresh_workers()
elif button_id == "finished-clear-btn":
# Clear selected finished worker
if self.finished_table and self.finished_table.row_count > 0:
@@ -559,27 +592,29 @@ class WorkersModal(ModalScreen):
selected_row = self.finished_table.cursor_row
if 0 <= selected_row < len(self.finished_workers):
worker = self.finished_workers[selected_row]
worker_id = worker.get('id')
if self.app_instance and hasattr(self.app_instance, 'clear_finished_worker'):
worker_id = worker.get("id")
if self.app_instance and hasattr(
self.app_instance, "clear_finished_worker"
):
self.app_instance.clear_finished_worker(worker_id)
logger.info(f"[workers-modal] Cleared worker: {worker_id}")
self.refresh_workers()
except Exception as e:
logger.error(f"[workers-modal] Error clearing worker: {e}")
elif button_id == "finished-clear-all-btn":
# Clear all finished workers
if self.app_instance and hasattr(self.app_instance, 'clear_all_finished_workers'):
if self.app_instance and hasattr(self.app_instance, "clear_all_finished_workers"):
self.app_instance.clear_all_finished_workers()
logger.info("[workers-modal] Cleared all finished workers")
self.refresh_workers()
elif button_id == "close-btn":
self.dismiss(None)
except Exception as e:
logger.error(f"[workers-modal] Error in on_button_pressed: {e}")
def action_cancel(self) -> None:
"""Action for Escape key - close modal."""
self.dismiss(None)
+23 -5
View File
@@ -3,6 +3,7 @@
The TUI is a frontend to the CLI, so it must use the same pipeline executor
implementation as the CLI (`CLI.PipelineExecutor`).
"""
from __future__ import annotations
import contextlib
@@ -149,7 +150,10 @@ class PipelineRunner:
try:
with capture_rich_output(stdout=stdout_buffer, stderr=stderr_buffer):
with contextlib.redirect_stdout(stdout_buffer), contextlib.redirect_stderr(stderr_buffer):
with (
contextlib.redirect_stdout(stdout_buffer),
contextlib.redirect_stderr(stderr_buffer),
):
if on_log:
on_log("Executing pipeline via CLI executor...")
self._executor.execute_tokens(list(tokens))
@@ -166,7 +170,11 @@ class PipelineRunner:
# Pull the canonical state out of pipeline context.
table = None
try:
table = ctx.get_display_table() or ctx.get_current_stage_table() or ctx.get_last_result_table()
table = (
ctx.get_display_table()
or ctx.get_current_stage_table()
or ctx.get_last_result_table()
)
except Exception:
table = None
@@ -259,7 +267,11 @@ class PipelineRunner:
try:
hist = list(getattr(ctx, "_RESULT_TABLE_HISTORY", []) or [])
snap["_RESULT_TABLE_HISTORY"] = [
(t, (items.copy() if isinstance(items, list) else list(items) if items else []), subj)
(
t,
(items.copy() if isinstance(items, list) else list(items) if items else []),
subj,
)
for (t, items, subj) in hist
if isinstance((t, items, subj), tuple)
]
@@ -269,7 +281,11 @@ class PipelineRunner:
try:
fwd = list(getattr(ctx, "_RESULT_TABLE_FORWARD", []) or [])
snap["_RESULT_TABLE_FORWARD"] = [
(t, (items.copy() if isinstance(items, list) else list(items) if items else []), subj)
(
t,
(items.copy() if isinstance(items, list) else list(items) if items else []),
subj,
)
for (t, items, subj) in fwd
if isinstance((t, items, subj), tuple)
]
@@ -278,7 +294,9 @@ class PipelineRunner:
try:
tail = list(getattr(ctx, "_PENDING_PIPELINE_TAIL", []) or [])
snap["_PENDING_PIPELINE_TAIL"] = [list(stage) for stage in tail if isinstance(stage, list)]
snap["_PENDING_PIPELINE_TAIL"] = [
list(stage) for stage in tail if isinstance(stage, list)
]
except Exception:
pass
+47 -9
View File
@@ -1,4 +1,5 @@
"""Modern Textual UI for driving Medeia-Macina pipelines."""
from __future__ import annotations
import json
@@ -13,7 +14,18 @@ from textual.binding import Binding
from textual.events import Key
from textual.containers import Container, Horizontal, Vertical
from textual.screen import ModalScreen
from textual.widgets import Button, DataTable, Footer, Header, Input, Label, OptionList, Select, Static, TextArea
from textual.widgets import (
Button,
DataTable,
Footer,
Header,
Input,
Label,
OptionList,
Select,
Static,
TextArea,
)
from textual.widgets.option_list import Option
BASE_DIR = Path(__file__).resolve().parent
@@ -174,7 +186,9 @@ class TagEditorPopup(ModalScreen[None]):
self._save_tags_background(to_add, to_del, desired)
@work(thread=True)
def _save_tags_background(self, to_add: List[str], to_del: List[str], desired: List[str]) -> None:
def _save_tags_background(
self, to_add: List[str], to_del: List[str], desired: List[str]
) -> None:
app = self.app # PipelineHubApp
try:
runner: PipelineRunner = getattr(app, "executor")
@@ -188,14 +202,26 @@ class TagEditorPopup(ModalScreen[None]):
del_cmd = f"@1 | delete-tag -store {store_tok}{query_chunk} {del_args}"
del_res = runner.run_pipeline(del_cmd, seeds=self._seeds, isolate=True)
if not getattr(del_res, "success", False):
failures.append(str(getattr(del_res, "error", "") or getattr(del_res, "stderr", "") or "delete-tag failed").strip())
failures.append(
str(
getattr(del_res, "error", "")
or getattr(del_res, "stderr", "")
or "delete-tag failed"
).strip()
)
if to_add:
add_args = " ".join(json.dumps(t) for t in to_add)
add_cmd = f"@1 | add-tag -store {store_tok}{query_chunk} {add_args}"
add_res = runner.run_pipeline(add_cmd, seeds=self._seeds, isolate=True)
if not getattr(add_res, "success", False):
failures.append(str(getattr(add_res, "error", "") or getattr(add_res, "stderr", "") or "add-tag failed").strip())
failures.append(
str(
getattr(add_res, "error", "")
or getattr(add_res, "stderr", "")
or "add-tag failed"
).strip()
)
if failures:
msg = failures[0]
@@ -401,7 +427,9 @@ class PipelineHubApp(App):
if not suggestion:
return
self.command_input.value = self._apply_suggestion_to_text(str(self.command_input.value or ""), suggestion)
self.command_input.value = self._apply_suggestion_to_text(
str(self.command_input.value or ""), suggestion
)
if self.suggestion_list:
self.suggestion_list.display = False
event.prevent_default()
@@ -496,7 +524,11 @@ class PipelineHubApp(App):
# Identify first stage command name for conservative auto-augmentation.
first_stage_cmd = ""
try:
first_stage_cmd = str(stages[0].split()[0]).replace("_", "-").strip().lower() if stages[0].split() else ""
first_stage_cmd = (
str(stages[0].split()[0]).replace("_", "-").strip().lower()
if stages[0].split()
else ""
)
except Exception:
first_stage_cmd = ""
@@ -720,7 +752,9 @@ class PipelineHubApp(App):
item: Any = None
# Prefer mapping displayed table row -> source item.
if self.current_result_table and 0 <= index < len(getattr(self.current_result_table, "rows", []) or []):
if self.current_result_table and 0 <= index < len(
getattr(self.current_result_table, "rows", []) or []
):
row = self.current_result_table.rows[index]
src_idx = getattr(row, "source_index", None)
if isinstance(src_idx, int) and 0 <= src_idx < len(self.result_items):
@@ -782,7 +816,9 @@ class PipelineHubApp(App):
return
text = ""
idx = int(getattr(self, "_selected_row_index", 0) or 0)
if self.current_result_table and 0 <= idx < len(getattr(self.current_result_table, "rows", []) or []):
if self.current_result_table and 0 <= idx < len(
getattr(self.current_result_table, "rows", []) or []
):
row = self.current_result_table.rows[idx]
lines = [f"{col.name}: {col.value}" for col in getattr(row, "columns", []) or []]
text = "\n".join(lines)
@@ -874,7 +910,9 @@ class PipelineHubApp(App):
worker_id = str(worker.get("worker_id") or worker.get("id") or "?")[:8]
worker_type = str(worker.get("worker_type") or worker.get("type") or "?")
status = str(worker.get("status") or worker.get("result") or "running")
details = worker.get("current_step") or worker.get("description") or worker.get("pipe") or ""
details = (
worker.get("current_step") or worker.get("description") or worker.get("pipe") or ""
)
self.worker_table.add_row(worker_id, worker_type, status, str(details)[:80])
+12 -11
View File
@@ -34,7 +34,7 @@ def _split_pipeline_stages(text: str) -> list[str]:
escaped = True
continue
if ch in ("\"", "'"):
if ch in ('"', "'"):
if quote is None:
quote = ch
elif quote == ch:
@@ -148,7 +148,9 @@ def _validate_add_note_requires_add_file_order(raw: str) -> Optional[SyntaxError
parsed_q = parse_query(str(query_val))
q_hash = get_field(parsed_q, "hash") or get_field(parsed_q, "sha256")
q_store = get_field(parsed_q, "store")
has_store_hash_in_query = bool(str(q_hash or "").strip() and str(q_store or "").strip())
has_store_hash_in_query = bool(
str(q_hash or "").strip() and str(q_store or "").strip()
)
except Exception:
has_store_hash_in_query = False
@@ -157,7 +159,7 @@ def _validate_add_note_requires_add_file_order(raw: str) -> Optional[SyntaxError
return SyntaxErrorDetail(
"Pipeline error: 'add-note' must come after 'add-file' when used with piped input. "
"Move 'add-note' after 'add-file', or call it with explicit targeting: "
"add-note -query \"store:<store> hash:<sha256> title:<title>,text:<text>\"."
'add-note -query "store:<store> hash:<sha256> title:<title>,text:<text>".'
)
return None
@@ -209,7 +211,7 @@ def validate_pipeline_text(text: str) -> Optional[SyntaxErrorDetail]:
last_pipe_outside_quotes = idx
if in_double:
return SyntaxErrorDetail('Syntax error: missing closing ' + '"' + '.', expected='"')
return SyntaxErrorDetail("Syntax error: missing closing " + '"' + ".", expected='"')
if in_single:
return SyntaxErrorDetail("Syntax error: missing closing '.", expected="'")
@@ -242,7 +244,9 @@ def validate_pipeline_text(text: str) -> Optional[SyntaxErrorDetail]:
continue
if ch == "|" and not in_single and not in_double:
if not seen_nonspace_since_pipe:
return SyntaxErrorDetail("Syntax error: empty pipeline stage (use a command between '|').")
return SyntaxErrorDetail(
"Syntax error: empty pipeline stage (use a command between '|')."
)
seen_nonspace_since_pipe = False
continue
if not in_single and not in_double and not ch.isspace():
@@ -310,7 +314,9 @@ def parse_query(query: str) -> Dict[str, Any]:
return result
def get_field(parsed_query: Dict[str, Any], field_name: str, default: Optional[str] = None) -> Optional[str]:
def get_field(
parsed_query: Dict[str, Any], field_name: str, default: Optional[str] = None
) -> Optional[str]:
"""Get a field value from a parsed query."""
return parsed_query.get("fields", {}).get((field_name or "").lower(), default)
@@ -320,8 +326,3 @@ def get_free_text(parsed_query: Dict[str, Any]) -> str:
"""Get the free-text portion of a parsed query."""
return str(parsed_query.get("text", "") or "")
+7 -4
View File
@@ -11,7 +11,7 @@ REGISTRY: Dict[str, Cmdlet] = {}
def _normalize_cmd_name(name: str) -> str:
return str(name or "").replace('_', '-').lower().strip()
return str(name or "").replace("_", "-").lower().strip()
def register_callable(names: Iterable[str], fn: Cmdlet) -> Cmdlet:
@@ -35,8 +35,10 @@ def register(names: Iterable[str]):
@register(["add-tags"])
def _run(result, args, config) -> int: ...
"""
def _wrap(fn: Cmdlet) -> Cmdlet:
return register_callable(names, fn)
return _wrap
@@ -47,12 +49,11 @@ def get(cmd_name: str) -> Cmdlet | None:
# Dynamically import all cmdlet modules in this directory (ignore files starting with _ and __init__.py)
# cmdlet self-register when instantiated via their __init__ method
import os
cmdlet_dir = os.path.dirname(__file__)
for filename in os.listdir(cmdlet_dir):
if not (
filename.endswith(".py")
and not filename.startswith("_")
and filename != "__init__.py"
filename.endswith(".py") and not filename.startswith("_") and filename != "__init__.py"
):
continue
@@ -67,12 +68,14 @@ for filename in os.listdir(cmdlet_dir):
_import_module(f".{mod_name}", __name__)
except Exception as e:
import sys
print(f"Error importing cmdlet '{mod_name}': {e}", file=sys.stderr)
continue
# Import and register native commands that are not considered cmdlet
try:
from cmdnat import register_native_commands as _register_native_commands
_register_native_commands(REGISTRY)
except Exception:
# Native commands are optional; ignore if unavailable
+1957 -1910
View File
File diff suppressed because it is too large Load Diff
+427 -179
View File
File diff suppressed because it is too large Load Diff
+28 -12
View File
@@ -27,11 +27,10 @@ class Add_Note(Cmdlet):
super().__init__(
name="add-note",
summary="Add file store note",
usage="add-note (-query \"title:<title>,text:<text>[,store:<store>][,hash:<sha256>]\") [ -store <store> | <piped> ]",
usage='add-note (-query "title:<title>,text:<text>[,store:<store>][,hash:<sha256>]") [ -store <store> | <piped> ]',
alias=[""],
arg=[
SharedArgs.STORE,
QueryArg(
"hash",
key="hash",
@@ -116,11 +115,13 @@ class Add_Note(Cmdlet):
# Fallback: best-effort regex.
name_match = re.search(r"\btitle\s*:\s*([^,\s]+)", normalized, flags=re.IGNORECASE)
text_match = re.search(r"\btext\s*:\s*(.+)$", normalized, flags=re.IGNORECASE)
note_name = (name_match.group(1).strip() if name_match else "")
note_text = (text_match.group(1).strip() if text_match else "")
note_name = name_match.group(1).strip() if name_match else ""
note_text = text_match.group(1).strip() if text_match else ""
return (note_name or None, note_text or None)
def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]:
def _resolve_hash(
self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]
) -> Optional[str]:
resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash)
if resolved:
return resolved
@@ -148,11 +149,17 @@ class Add_Note(Cmdlet):
hash_override = normalize_hash(parsed.get("hash"))
note_name, note_text = self._parse_note_query(str(parsed.get("query") or ""))
if not note_name or not note_text:
log("[add_note] Error: -query must include title:<title> and text:<text>", file=sys.stderr)
log(
"[add_note] Error: -query must include title:<title> and text:<text>",
file=sys.stderr,
)
return 1
if hash_override and not store_override:
log("[add_note] Error: hash:<sha256> requires store:<store> in -query or -store <store>", file=sys.stderr)
log(
"[add_note] Error: hash:<sha256> requires store:<store> in -query or -store <store>",
file=sys.stderr,
)
return 1
explicit_target = bool(hash_override and store_override)
@@ -166,7 +173,9 @@ class Add_Note(Cmdlet):
backend = store_registry[str(store_override)]
ok = bool(backend.set_note(str(hash_override), note_name, note_text, config=config))
if ok:
ctx.print_if_visible(f"✓ add-note: 1 item in '{store_override}'", file=sys.stderr)
ctx.print_if_visible(
f"✓ add-note: 1 item in '{store_override}'", file=sys.stderr
)
except Exception as exc:
log(f"[add_note] Error: Failed to set note: {exc}", file=sys.stderr)
return 1
@@ -180,7 +189,10 @@ class Add_Note(Cmdlet):
# Allow standalone use (no piped input) and enable piping the target forward.
results = [{"store": str(store_override), "hash": hash_override}]
else:
log("[add_note] Error: Requires piped item(s) from add-file, or explicit targeting via store/hash (e.g., -query \"store:<store> hash:<sha256> ...\")", file=sys.stderr)
log(
'[add_note] Error: Requires piped item(s) from add-file, or explicit targeting via store/hash (e.g., -query "store:<store> hash:<sha256> ...")',
file=sys.stderr,
)
return 1
store_registry = Store(config)
@@ -242,10 +254,15 @@ class Add_Note(Cmdlet):
try:
ok = bool(bulk_fn(list(ops), config=config))
wrote_any = wrote_any or ok or True
ctx.print_if_visible(f"✓ add-note: {len(ops)} item(s) in '{store_name}'", file=sys.stderr)
ctx.print_if_visible(
f"✓ add-note: {len(ops)} item(s) in '{store_name}'", file=sys.stderr
)
continue
except Exception as exc:
log(f"[add_note] Warning: bulk set_note failed for '{store_name}': {exc}; falling back", file=sys.stderr)
log(
f"[add_note] Warning: bulk set_note failed for '{store_name}': {exc}; falling back",
file=sys.stderr,
)
# Fallback: per-item writes
for file_hash, name, text in ops:
@@ -260,4 +277,3 @@ class Add_Note(Cmdlet):
CMDLET = Add_Note()
+234 -87
View File
@@ -29,12 +29,28 @@ CMDLET = Cmdlet(
summary="Associate file relationships (king/alt/related) in Hydrus based on relationship tags in sidecar.",
usage="@1-3 | add-relationship -king @4 OR add-relationship -path <file> OR @1,@2,@3 | add-relationship",
arg=[
CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."),
CmdletArg(
"path",
type="string",
description="Specify the local file path (if not piping a result).",
),
SharedArgs.STORE,
SharedArgs.QUERY,
CmdletArg("-king", type="string", description="Explicitly set the king hash/file for relationships (e.g., -king @4 or -king hash)"),
CmdletArg("-alt", type="string", description="Explicitly select alt item(s) by @ selection or hash list (e.g., -alt @3-5 or -alt <hash>,<hash>)"),
CmdletArg("-type", type="string", description="Relationship type for piped items (default: 'alt', options: 'king', 'alt', 'related')"),
CmdletArg(
"-king",
type="string",
description="Explicitly set the king hash/file for relationships (e.g., -king @4 or -king hash)",
),
CmdletArg(
"-alt",
type="string",
description="Explicitly select alt item(s) by @ selection or hash list (e.g., -alt @3-5 or -alt <hash>,<hash>)",
),
CmdletArg(
"-type",
type="string",
description="Relationship type for piped items (default: 'alt', options: 'king', 'alt', 'related')",
),
],
detail=[
"- Mode 1: Pipe multiple items, first becomes king, rest become alts (default)",
@@ -54,7 +70,7 @@ def _normalise_hash_hex(value: Optional[str]) -> Optional[str]:
if not value or not isinstance(value, str):
return None
normalized = value.strip().lower()
if len(normalized) == 64 and all(c in '0123456789abcdef' for c in normalized):
if len(normalized) == 64 and all(c in "0123456789abcdef" for c in normalized):
return normalized
return None
@@ -71,9 +87,9 @@ def _extract_relationships_from_tag(tag_value: str) -> Dict[str, list[str]]:
result: Dict[str, list[str]] = {}
if not isinstance(tag_value, str):
return result
# Match patterns like hash(king)HASH or hash(type)<HASH>
pattern = r'hash\((\w+)\)<?([a-fA-F0-9]{64})>?'
pattern = r"hash\((\w+)\)<?([a-fA-F0-9]{64})>?"
matches = re.findall(pattern, tag_value)
if matches:
@@ -118,7 +134,11 @@ def _apply_relationships_from_tags(
- Treat the first hash (king) as the king.
- Store directional alt -> king relationships (no reverse edge).
"""
rel_tags = [t for t in relationship_tags if isinstance(t, str) and t.strip().lower().startswith("relationship:")]
rel_tags = [
t
for t in relationship_tags
if isinstance(t, str) and t.strip().lower().startswith("relationship:")
]
if not rel_tags:
return 0
@@ -166,7 +186,9 @@ def _apply_relationships_from_tags(
# For local DB we treat all non-king hashes as alts.
alt_hashes: list[str] = []
for bucket in ("alt", "related"):
alt_hashes.extend([h for h in (rels.get(bucket) or []) if isinstance(h, str)])
alt_hashes.extend(
[h for h in (rels.get(bucket) or []) if isinstance(h, str)]
)
for alt in alt_hashes:
alt_norm = _normalise_hash_hex(alt)
@@ -191,7 +213,7 @@ def _parse_at_selection(token: str) -> Optional[list[int]]:
if not isinstance(token, str):
return None
t = token.strip()
if not t.startswith('@'):
if not t.startswith("@"):
return None
if t == "@*":
return [] # special sentinel: caller interprets as "all"
@@ -293,27 +315,28 @@ def _resolve_king_reference(king_arg: str) -> Optional[str]:
"""
if not king_arg:
return None
# Check if it's already a valid hash
normalized = _normalise_hash_hex(king_arg)
if normalized:
return normalized
# Try to resolve as @ selection from pipeline context
if king_arg.startswith('@'):
if king_arg.startswith("@"):
selected = _resolve_items_from_at(king_arg)
if not selected:
log(f"Cannot resolve {king_arg}: no selection context", file=sys.stderr)
return None
if len(selected) != 1:
log(f"{king_arg} selects {len(selected)} items; -king requires exactly 1", file=sys.stderr)
log(
f"{king_arg} selects {len(selected)} items; -king requires exactly 1",
file=sys.stderr,
)
return None
item = selected[0]
item_hash = (
get_field(item, 'hash_hex')
or get_field(item, 'hash')
or get_field(item, 'file_hash')
get_field(item, "hash_hex") or get_field(item, "hash") or get_field(item, "file_hash")
)
if item_hash:
@@ -323,11 +346,16 @@ def _resolve_king_reference(king_arg: str) -> Optional[str]:
log(f"Item {king_arg} has no hash information", file=sys.stderr)
return None
return None
def _refresh_relationship_view_if_current(target_hash: Optional[str], target_path: Optional[str], other: Optional[str], config: Dict[str, Any]) -> None:
def _refresh_relationship_view_if_current(
target_hash: Optional[str],
target_path: Optional[str],
other: Optional[str],
config: Dict[str, Any],
) -> None:
"""If the current subject matches the target, refresh relationships via get-relationship."""
try:
from cmdlet import get as get_cmdlet # type: ignore
@@ -356,11 +384,32 @@ def _refresh_relationship_view_if_current(target_hash: Optional[str], target_pat
subj_hashes: list[str] = []
subj_paths: list[str] = []
if isinstance(subject, dict):
subj_hashes = [norm(v) for v in [subject.get("hydrus_hash"), subject.get("hash"), subject.get("hash_hex"), subject.get("file_hash")] if v]
subj_paths = [norm(v) for v in [subject.get("file_path"), subject.get("path"), subject.get("target")] if v]
subj_hashes = [
norm(v)
for v in [
subject.get("hydrus_hash"),
subject.get("hash"),
subject.get("hash_hex"),
subject.get("file_hash"),
]
if v
]
subj_paths = [
norm(v)
for v in [subject.get("file_path"), subject.get("path"), subject.get("target")]
if v
]
else:
subj_hashes = [norm(getattr(subject, f, None)) for f in ("hydrus_hash", "hash", "hash_hex", "file_hash") if getattr(subject, f, None)]
subj_paths = [norm(getattr(subject, f, None)) for f in ("file_path", "path", "target") if getattr(subject, f, None)]
subj_hashes = [
norm(getattr(subject, f, None))
for f in ("hydrus_hash", "hash", "hash_hex", "file_hash")
if getattr(subject, f, None)
]
subj_paths = [
norm(getattr(subject, f, None))
for f in ("file_path", "path", "target")
if getattr(subject, f, None)
]
is_match = False
if target_hashes and any(h in subj_hashes for h in target_hashes):
@@ -380,7 +429,7 @@ def _refresh_relationship_view_if_current(target_hash: Optional[str], target_pat
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
"""Associate file relationships in Hydrus.
Two modes of operation:
1. Read from sidecar: Looks for relationship tags in the file's sidecar (format: "relationship: hash(king)<HASH>,hash(alt)<HASH>")
2. Pipeline mode: When piping multiple results, the first becomes "king" and subsequent items become "alt"
@@ -391,7 +440,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if should_show_help(_args):
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
# Parse arguments using CMDLET spec
parsed = parse_cmdlet_args(_args, CMDLET)
arg_path: Optional[Path] = None
@@ -420,7 +469,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if alt_arg:
alt_text = str(alt_arg).strip()
resolved_alt_items: list[Any] = []
if alt_text.startswith('@'):
if alt_text.startswith("@"):
selected = _resolve_items_from_at(alt_text)
if not selected:
log(f"Failed to resolve -alt {alt_text}: no selection context", file=sys.stderr)
@@ -431,7 +480,10 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
parts = [p.strip() for p in alt_text.replace(";", ",").split(",") if p.strip()]
hashes = [h for h in (_normalise_hash_hex(p) for p in parts) if h]
if not hashes:
log("Invalid -alt value (expected @ selection or 64-hex sha256 hash list)", file=sys.stderr)
log(
"Invalid -alt value (expected @ selection or 64-hex sha256 hash list)",
file=sys.stderr,
)
return 1
if not override_store:
log("-store is required when using -alt with a raw hash list", file=sys.stderr)
@@ -445,11 +497,11 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
log("-store is required when using -query without piped items", file=sys.stderr)
return 1
items_to_process = [{"hash": h, "store": str(override_store)} for h in override_hashes]
if not items_to_process and not arg_path:
log("No items provided to add-relationship (no piped result and no -path)", file=sys.stderr)
return 1
# If no items from pipeline, just process the -path arg
if not items_to_process and arg_path:
items_to_process = [{"file_path": arg_path}]
@@ -459,13 +511,16 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
king_store: Optional[str] = None
if king_arg:
king_text = str(king_arg).strip()
if king_text.startswith('@'):
if king_text.startswith("@"):
selected = _resolve_items_from_at(king_text)
if not selected:
log(f"Cannot resolve {king_text}: no selection context", file=sys.stderr)
return 1
if len(selected) != 1:
log(f"{king_text} selects {len(selected)} items; -king requires exactly 1", file=sys.stderr)
log(
f"{king_text} selects {len(selected)} items; -king requires exactly 1",
file=sys.stderr,
)
return 1
king_hash, king_store = _extract_hash_and_store(selected[0])
if not king_hash:
@@ -490,18 +545,27 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if len(stores) == 1:
store_name = next(iter(stores))
elif len(stores) > 1:
log("Multiple stores detected (king/alt across stores); use -store and ensure all selections are from the same store", file=sys.stderr)
log(
"Multiple stores detected (king/alt across stores); use -store and ensure all selections are from the same store",
file=sys.stderr,
)
return 1
# Enforce same-store relationships when store context is available.
if king_store and store_name and str(king_store) != str(store_name):
log(f"Cross-store relationship blocked: king is in store '{king_store}' but -store is '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: king is in store '{king_store}' but -store is '{store_name}'",
file=sys.stderr,
)
return 1
if store_name:
for item in items_to_process:
s = get_field(item, "store")
if s and str(s) != str(store_name):
log(f"Cross-store relationship blocked: alt item store '{s}' != '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: alt item store '{s}' != '{store_name}'",
file=sys.stderr,
)
return 1
# Resolve backend for store/hash operations
@@ -564,7 +628,11 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
sidecar_path = find_sidecar(arg_path)
if sidecar_path is not None and sidecar_path.exists():
_, tags, _ = read_sidecar(sidecar_path)
relationship_tags = [t for t in (tags or []) if isinstance(t, str) and t.lower().startswith("relationship:")]
relationship_tags = [
t
for t in (tags or [])
if isinstance(t, str) and t.lower().startswith("relationship:")
]
if relationship_tags:
code = _apply_relationships_from_tags(
relationship_tags,
@@ -587,7 +655,13 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
else:
tags_val = getattr(item, "tag", None)
if isinstance(tags_val, list):
rel_tags_from_pipe.extend([t for t in tags_val if isinstance(t, str) and t.lower().startswith("relationship:")])
rel_tags_from_pipe.extend(
[
t
for t in tags_val
if isinstance(t, str) and t.lower().startswith("relationship:")
]
)
elif isinstance(tags_val, str) and tags_val.lower().startswith("relationship:"):
rel_tags_from_pipe.append(tags_val)
@@ -613,7 +687,10 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
for item in items_to_process:
h, item_store = _extract_hash_and_store(item)
if item_store and store_name and str(item_store) != str(store_name):
log(f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'",
file=sys.stderr,
)
return 1
if not h:
continue
@@ -622,19 +699,26 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
continue
# directional alt -> king by default for local DB
bidirectional = str(rel_type).lower() != "alt"
db.set_relationship_by_hash(h, first_hash, str(rel_type), bidirectional=bidirectional)
db.set_relationship_by_hash(
h, first_hash, str(rel_type), bidirectional=bidirectional
)
return 0
# Mode 2: explicit king
for item in items_to_process:
h, item_store = _extract_hash_and_store(item)
if item_store and store_name and str(item_store) != str(store_name):
log(f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'",
file=sys.stderr,
)
return 1
if not h or h == king_hash:
continue
bidirectional = str(rel_type).lower() != "alt"
db.set_relationship_by_hash(h, king_hash, str(rel_type), bidirectional=bidirectional)
db.set_relationship_by_hash(
h, king_hash, str(rel_type), bidirectional=bidirectional
)
return 0
except Exception as exc:
log(f"Failed to set store relationships: {exc}", file=sys.stderr)
@@ -648,7 +732,10 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Verify hashes exist in this Hydrus backend to prevent cross-store edges.
if king_hash and (not _hydrus_hash_exists(hydrus_client, king_hash)):
log(f"Cross-store relationship blocked: king hash not found in store '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: king hash not found in store '{store_name}'",
file=sys.stderr,
)
return 1
# Mode 1: first is king
@@ -657,19 +744,28 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
for item in items_to_process:
h, item_store = _extract_hash_and_store(item)
if item_store and store_name and str(item_store) != str(store_name):
log(f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'",
file=sys.stderr,
)
return 1
if not h:
continue
if not first_hash:
first_hash = h
if not _hydrus_hash_exists(hydrus_client, first_hash):
log(f"Cross-store relationship blocked: hash not found in store '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: hash not found in store '{store_name}'",
file=sys.stderr,
)
return 1
continue
if h != first_hash:
if not _hydrus_hash_exists(hydrus_client, h):
log(f"Cross-store relationship blocked: hash not found in store '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: hash not found in store '{store_name}'",
file=sys.stderr,
)
return 1
hydrus_client.set_relationship(h, first_hash, str(rel_type))
return 0
@@ -678,12 +774,18 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
for item in items_to_process:
h, item_store = _extract_hash_and_store(item)
if item_store and store_name and str(item_store) != str(store_name):
log(f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'",
file=sys.stderr,
)
return 1
if not h or h == king_hash:
continue
if not _hydrus_hash_exists(hydrus_client, h):
log(f"Cross-store relationship blocked: hash not found in store '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: hash not found in store '{store_name}'",
file=sys.stderr,
)
return 1
hydrus_client.set_relationship(h, king_hash, str(rel_type))
return 0
@@ -693,18 +795,19 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Extract hash and path from current item
file_hash = None
file_path_from_result = None
if isinstance(item, dict):
file_hash = item.get("hash_hex") or item.get("hash")
file_path_from_result = item.get("file_path") or item.get("path") or item.get("target")
else:
file_hash = getattr(item, "hash_hex", None) or getattr(item, "hash", None)
file_path_from_result = getattr(item, "file_path", None) or getattr(item, "path", None)
# Legacy LOCAL STORAGE MODE: Handle relationships for local files
# (kept for -path sidecar workflows; store/hash mode above is preferred)
from API.folder import LocalLibrarySearchOptimizer
from config import get_local_storage_path
local_storage_path = get_local_storage_path(config) if config else None
use_local_storage = bool(local_storage_path)
local_storage_root: Optional[Path] = None
@@ -743,13 +846,22 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
return 1
king_file_path = opt.db.search_hash(normalized_king)
if not king_file_path:
log(f"King hash not found in local DB: {king_hash}", file=sys.stderr)
log(
f"King hash not found in local DB: {king_hash}", file=sys.stderr
)
return 1
bidirectional = str(rel_type).lower() != "alt"
opt.db.set_relationship(file_path_obj, king_file_path, rel_type, bidirectional=bidirectional)
log(f"Set {rel_type} relationship: {file_path_obj.name} -> {king_file_path.name}", file=sys.stderr)
_refresh_relationship_view_if_current(None, str(file_path_obj), str(king_file_path), config)
opt.db.set_relationship(
file_path_obj, king_file_path, rel_type, bidirectional=bidirectional
)
log(
f"Set {rel_type} relationship: {file_path_obj.name} -> {king_file_path.name}",
file=sys.stderr,
)
_refresh_relationship_view_if_current(
None, str(file_path_obj), str(king_file_path), config
)
else:
# Original behavior: first becomes king, rest become alts
try:
@@ -760,16 +872,29 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if not king_path:
try:
ctx.store_value("relationship_king_path", str(file_path_obj))
log(f"Established king file: {file_path_obj.name}", file=sys.stderr)
log(
f"Established king file: {file_path_obj.name}",
file=sys.stderr,
)
continue
except Exception:
pass
if king_path and king_path != str(file_path_obj):
bidirectional = str(rel_type).lower() != "alt"
opt.db.set_relationship(file_path_obj, Path(king_path), rel_type, bidirectional=bidirectional)
log(f"Set {rel_type} relationship: {file_path_obj.name} -> {Path(king_path).name}", file=sys.stderr)
_refresh_relationship_view_if_current(None, str(file_path_obj), str(king_path), config)
opt.db.set_relationship(
file_path_obj,
Path(king_path),
rel_type,
bidirectional=bidirectional,
)
log(
f"Set {rel_type} relationship: {file_path_obj.name} -> {Path(king_path).name}",
file=sys.stderr,
)
_refresh_relationship_view_if_current(
None, str(file_path_obj), str(king_path), config
)
except Exception as exc:
log(f"Local storage error: {exc}", file=sys.stderr)
return 1
@@ -781,16 +906,21 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if not file_hash:
log("Invalid file hash format", file=sys.stderr)
return 1
# If explicit -king provided, use it
if king_hash:
try:
hydrus_client.set_relationship(file_hash, king_hash, rel_type)
log(
f"[add-relationship] Set {rel_type} relationship: {file_hash} <-> {king_hash}",
file=sys.stderr
file=sys.stderr,
)
_refresh_relationship_view_if_current(
file_hash,
str(file_path_from_result) if file_path_from_result is not None else None,
king_hash,
config,
)
_refresh_relationship_view_if_current(file_hash, str(file_path_from_result) if file_path_from_result is not None else None, king_hash, config)
except Exception as exc:
log(f"Failed to set relationship: {exc}", file=sys.stderr)
return 1
@@ -800,7 +930,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
existing_king = ctx.load_value("relationship_king")
except Exception:
existing_king = None
# If this is the first item, make it the king
if not existing_king:
try:
@@ -809,34 +939,48 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
continue # Move to next item
except Exception:
pass
# If we already have a king and this is a different hash, link them
if existing_king and existing_king != file_hash:
try:
hydrus_client.set_relationship(file_hash, existing_king, rel_type)
log(
f"[add-relationship] Set {rel_type} relationship: {file_hash} <-> {existing_king}",
file=sys.stderr
file=sys.stderr,
)
_refresh_relationship_view_if_current(
file_hash,
(
str(file_path_from_result)
if file_path_from_result is not None
else None
),
existing_king,
config,
)
_refresh_relationship_view_if_current(file_hash, str(file_path_from_result) if file_path_from_result is not None else None, existing_king, config)
except Exception as exc:
log(f"Failed to set relationship: {exc}", file=sys.stderr)
return 1
# If we get here, we didn't have a usable local path and Hydrus isn't available/usable.
return 0
# FILE MODE: Read relationships from sidecar (legacy mode - for -path arg only)
log("Note: Use piping mode for easier relationships. Example: 1,2,3 | add-relationship", file=sys.stderr)
log(
"Note: Use piping mode for easier relationships. Example: 1,2,3 | add-relationship",
file=sys.stderr,
)
# Resolve media path from -path arg or result target
target = getattr(result, "target", None) or getattr(result, "path", None)
media_path = arg_path if arg_path is not None else Path(str(target)) if isinstance(target, str) else None
media_path = (
arg_path if arg_path is not None else Path(str(target)) if isinstance(target, str) else None
)
if media_path is None:
log("Provide -path <file> or pipe a local file result", file=sys.stderr)
return 1
# Validate local file
if str(media_path).lower().startswith(("http://", "https://")):
log("This cmdlet requires a local file path, not a URL", file=sys.stderr)
@@ -851,7 +995,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
except Exception as exc:
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
return 1
if hydrus_client is None:
log("Hydrus client unavailable", file=sys.stderr)
return 1
@@ -861,7 +1005,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if sidecar_path is None:
log(f"No sidecar found for {media_path.name}", file=sys.stderr)
return 1
try:
_, tags, _ = read_sidecar(sidecar_path)
except Exception as exc:
@@ -869,63 +1013,68 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
return 1
# Find relationship tags (format: "relationship: hash(king)<HASH>,hash(alt)<HASH>,hash(related)<HASH>")
relationship_tags = [t for t in tags if isinstance(t, str) and t.lower().startswith("relationship:")]
relationship_tags = [
t for t in tags if isinstance(t, str) and t.lower().startswith("relationship:")
]
if not relationship_tags:
log(f"No relationship tags found in sidecar", file=sys.stderr)
return 0 # Not an error, just nothing to do
# Get the file hash from result (should have been set by add-file)
file_hash = getattr(result, "hash_hex", None)
if not file_hash:
log("File hash not available (run add-file first)", file=sys.stderr)
return 1
file_hash = _normalise_hash_hex(file_hash)
if not file_hash:
log("Invalid file hash format", file=sys.stderr)
return 1
# Parse relationships from tags and apply them
success_count = 0
error_count = 0
for rel_tag in relationship_tags:
try:
# Parse: "relationship: hash(king)<HASH>,hash(alt)<HASH>,hash(related)<HASH>"
rel_str = rel_tag.split(":", 1)[1].strip() # Get part after "relationship:"
# Parse relationships
rels = _extract_relationships_from_tag(f"relationship: {rel_str}")
# Set the relationships in Hydrus
for rel_type, related_hashes in rels.items():
if not related_hashes:
continue
for related_hash in related_hashes:
# Don't set relationship between hash and itself
if file_hash == related_hash:
continue
try:
hydrus_client.set_relationship(file_hash, related_hash, rel_type)
log(
f"[add-relationship] Set {rel_type} relationship: "
f"{file_hash} <-> {related_hash}",
file=sys.stderr
file=sys.stderr,
)
success_count += 1
except Exception as exc:
log(f"Failed to set {rel_type} relationship: {exc}", file=sys.stderr)
error_count += 1
except Exception as exc:
log(f"Failed to parse relationship tag: {exc}", file=sys.stderr)
error_count += 1
if success_count > 0:
log(f"Successfully set {success_count} relationship(s) for {media_path.name}", file=sys.stderr)
log(
f"Successfully set {success_count} relationship(s) for {media_path.name}",
file=sys.stderr,
)
ctx.emit(f"add-relationship: {media_path.name} ({success_count} relationships set)")
return 0
elif error_count == 0:
@@ -940,5 +1089,3 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
CMDLET.exec = _run
CMDLET.alias = ["add-rel"]
CMDLET.register()
+162 -52
View File
@@ -118,14 +118,24 @@ def _compile_extract_template(template: str) -> tuple[re.Pattern[str], List[str]
name_lower = raw_name.lower()
is_last = idx == (len(matches) - 1)
if is_last:
parts.append(fr"(?P<{raw_name}>.+)")
parts.append(rf"(?P<{raw_name}>.+)")
else:
# Heuristic: common numeric fields should capture full digit runs.
# This avoids ambiguous splits like track='2', title='3 ...'.
if name_lower in {"disk", "disc", "cd", "track", "trk", "episode", "ep", "season", "year"}:
parts.append(fr"(?P<{raw_name}>\d+)")
if name_lower in {
"disk",
"disc",
"cd",
"track",
"trk",
"episode",
"ep",
"season",
"year",
}:
parts.append(rf"(?P<{raw_name}>\d+)")
else:
parts.append(fr"(?P<{raw_name}>.+?)")
parts.append(rf"(?P<{raw_name}>.+?)")
last_end = m.end()
@@ -159,7 +169,9 @@ def _extract_tags_from_title(title_text: str, template: str) -> List[str]:
return out
def _get_title_candidates_for_extraction(res: Any, existing_tags: Optional[List[str]] = None) -> List[str]:
def _get_title_candidates_for_extraction(
res: Any, existing_tags: Optional[List[str]] = None
) -> List[str]:
"""Return a list of possible title strings in priority order."""
candidates: List[str] = []
@@ -197,7 +209,9 @@ def _get_title_candidates_for_extraction(res: Any, existing_tags: Optional[List[
return candidates
def _extract_tags_from_title_candidates(candidates: List[str], template: str) -> tuple[List[str], Optional[str]]:
def _extract_tags_from_title_candidates(
candidates: List[str], template: str
) -> tuple[List[str], Optional[str]]:
"""Try candidates in order; return (tags, matched_candidate)."""
for c in candidates:
@@ -207,7 +221,9 @@ def _extract_tags_from_title_candidates(candidates: List[str], template: str) ->
return [], None
def _try_compile_extract_template(template: Optional[str]) -> tuple[Optional[re.Pattern[str]], Optional[str]]:
def _try_compile_extract_template(
template: Optional[str],
) -> tuple[Optional[re.Pattern[str]], Optional[str]]:
"""Compile template for debug; return (pattern, error_message)."""
if template is None:
return None, None
@@ -387,7 +403,13 @@ def _refresh_result_table_title(
pass
def _refresh_tag_view(res: Any, target_hash: Optional[str], store_name: Optional[str], target_path: Optional[str], config: Dict[str, Any]) -> None:
def _refresh_tag_view(
res: Any,
target_hash: Optional[str],
store_name: Optional[str],
target_path: Optional[str],
config: Dict[str, Any],
) -> None:
"""Refresh tag display via get-tag. Prefer current subject; fall back to direct hash refresh."""
try:
from cmdlet import get as get_cmdlet # type: ignore
@@ -421,7 +443,6 @@ def _refresh_tag_view(res: Any, target_hash: Optional[str], store_name: Optional
pass
class Add_Tag(Cmdlet):
"""Class-based add-tag cmdlet with Cmdlet metadata inheritance."""
@@ -429,16 +450,42 @@ class Add_Tag(Cmdlet):
super().__init__(
name="add-tag",
summary="Add tag to a file in a store.",
usage="add-tag -store <store> [-query \"hash:<sha256>\"] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]",
usage='add-tag -store <store> [-query "hash:<sha256>"] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]',
arg=[
CmdletArg("tag", type="string", required=False, description="One or more tag to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tag from pipeline payload.", variadic=True),
CmdletArg(
"tag",
type="string",
required=False,
description="One or more tag to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tag from pipeline payload.",
variadic=True,
),
SharedArgs.QUERY,
SharedArgs.STORE,
CmdletArg("-extract", type="string", description="Extract tags from the item's title using a simple template with (field) placeholders. Example: -extract \"(artist) - (album) - (disk)-(track) (title)\" will add artist:, album:, disk:, track:, title: tags."),
CmdletArg("--extract-debug", type="flag", description="Print debug info for -extract matching (matched title source and extracted tags)."),
CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"),
CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."),
CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tag non-temporary files)."),
CmdletArg(
"-extract",
type="string",
description='Extract tags from the item\'s title using a simple template with (field) placeholders. Example: -extract "(artist) - (album) - (disk)-(track) (title)" will add artist:, album:, disk:, track:, title: tags.',
),
CmdletArg(
"--extract-debug",
type="flag",
description="Print debug info for -extract matching (matched title source and extracted tags).",
),
CmdletArg(
"-duplicate",
type="string",
description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)",
),
CmdletArg(
"-list",
type="string",
description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult).",
),
CmdletArg(
"--all",
type="flag",
description="Include temporary files in tagging (by default, only tag non-temporary files).",
),
],
detail=[
"- By default, only tag non-temporary files (from pipelines). Use --all to tag everything.",
@@ -446,7 +493,7 @@ class Add_Tag(Cmdlet):
"- If -query is not provided, uses the piped item's hash (or derives from its path when possible).",
"- Multiple tag can be comma-separated or space-separated.",
"- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult",
"- tag can also reference lists with curly braces: add-tag {philosophy} \"other:tag\"",
'- tag can also reference lists with curly braces: add-tag {philosophy} "other:tag"',
"- Use -duplicate to copy EXISTING tag values to new namespaces:",
" Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)",
" Inferred format: -duplicate title,album,artist (first is source, rest are targets)",
@@ -484,7 +531,9 @@ class Add_Tag(Cmdlet):
# @N | download-media | add-tag ... | add-file ...
store_override = parsed.get("store")
stage_ctx = ctx.get_stage_context()
has_downstream = bool(stage_ctx is not None and not getattr(stage_ctx, "is_last_stage", False))
has_downstream = bool(
stage_ctx is not None and not getattr(stage_ctx, "is_last_stage", False)
)
include_temp = bool(parsed.get("all", False))
if has_downstream and not include_temp and not store_override:
@@ -498,7 +547,10 @@ class Add_Tag(Cmdlet):
results = filter_results_by_temp(results, include_temp=False)
if not results:
log("No valid files to tag (all results were temporary; use --all to include temporary files)", file=sys.stderr)
log(
"No valid files to tag (all results were temporary; use --all to include temporary files)",
file=sys.stderr,
)
return 1
# Get tag from arguments (or fallback to pipeline payload)
@@ -512,13 +564,13 @@ class Add_Tag(Cmdlet):
if not raw_tag and results and not extract_template:
first = results[0]
payload_tag = None
# Try multiple tag lookup strategies in order
tag_lookups = [
lambda x: getattr(x, "tag", None),
lambda x: x.get("tag") if isinstance(x, dict) else None,
]
for lookup in tag_lookups:
try:
payload_tag = lookup(first)
@@ -526,7 +578,7 @@ class Add_Tag(Cmdlet):
break
except (AttributeError, TypeError, KeyError):
continue
if payload_tag:
if isinstance(payload_tag, str):
raw_tag = [payload_tag]
@@ -536,7 +588,7 @@ class Add_Tag(Cmdlet):
# Handle -list argument (convert to {list} syntax)
list_arg = parsed.get("list")
if list_arg:
for l in list_arg.split(','):
for l in list_arg.split(","):
l = l.strip()
if l:
raw_tag.append(f"{{{l}}}")
@@ -590,46 +642,70 @@ class Add_Tag(Cmdlet):
# treat add-tag as a pipeline mutation (carry tags forward for add-file) instead of a store write.
if not store_override:
store_name_str = str(store_name) if store_name is not None else ""
local_mode_requested = (not store_name_str) or (store_name_str.upper() == "PATH") or (store_name_str.lower() == "local")
is_known_backend = bool(store_name_str) and store_registry.is_available(store_name_str)
local_mode_requested = (
(not store_name_str)
or (store_name_str.upper() == "PATH")
or (store_name_str.lower() == "local")
)
is_known_backend = bool(store_name_str) and store_registry.is_available(
store_name_str
)
if local_mode_requested and raw_path:
try:
if Path(str(raw_path)).expanduser().exists():
existing_tag_list = _extract_item_tags(res)
existing_lower = {t.lower() for t in existing_tag_list if isinstance(t, str)}
existing_lower = {
t.lower() for t in existing_tag_list if isinstance(t, str)
}
item_tag_to_add = list(tag_to_add)
if extract_template:
candidates = _get_title_candidates_for_extraction(res, existing_tag_list)
extracted, matched = _extract_tags_from_title_candidates(candidates, extract_template)
candidates = _get_title_candidates_for_extraction(
res, existing_tag_list
)
extracted, matched = _extract_tags_from_title_candidates(
candidates, extract_template
)
if extracted:
extract_matched_items += 1
if extract_debug:
log(f"[add_tag] extract matched: {matched!r} -> {extracted}", file=sys.stderr)
log(
f"[add_tag] extract matched: {matched!r} -> {extracted}",
file=sys.stderr,
)
for new_tag in extracted:
if new_tag.lower() not in existing_lower:
item_tag_to_add.append(new_tag)
else:
extract_no_match_items += 1
if extract_debug:
rx_preview = extract_debug_rx.pattern if extract_debug_rx else "<uncompiled>"
rx_preview = (
extract_debug_rx.pattern
if extract_debug_rx
else "<uncompiled>"
)
cand_preview = "; ".join([repr(c) for c in candidates[:3]])
log(f"[add_tag] extract no match for template {extract_template!r}. regex: {rx_preview!r}. candidates: {cand_preview}", file=sys.stderr)
log(
f"[add_tag] extract no match for template {extract_template!r}. regex: {rx_preview!r}. candidates: {cand_preview}",
file=sys.stderr,
)
item_tag_to_add = collapse_namespace_tag(item_tag_to_add, "title", prefer="last")
item_tag_to_add = collapse_namespace_tag(
item_tag_to_add, "title", prefer="last"
)
if duplicate_arg:
parts = str(duplicate_arg).split(':')
parts = str(duplicate_arg).split(":")
source_ns = ""
targets: list[str] = []
if len(parts) > 1:
source_ns = parts[0]
targets = [t.strip() for t in parts[1].split(',') if t.strip()]
targets = [t.strip() for t in parts[1].split(",") if t.strip()]
else:
parts2 = str(duplicate_arg).split(',')
parts2 = str(duplicate_arg).split(",")
if len(parts2) > 1:
source_ns = parts2[0]
targets = [t.strip() for t in parts2[1:] if t.strip()]
@@ -654,7 +730,10 @@ class Add_Tag(Cmdlet):
continue
ns_prefix = ns.lower() + ":"
for t in existing_tag_list:
if t.lower().startswith(ns_prefix) and t.lower() != new_tag.lower():
if (
t.lower().startswith(ns_prefix)
and t.lower() != new_tag.lower()
):
removed_namespace_tag.append(t)
removed_namespace_tag = sorted({t for t in removed_namespace_tag})
@@ -664,7 +743,9 @@ class Add_Tag(Cmdlet):
if isinstance(t, str) and t.lower() not in existing_lower
]
updated_tag_list = [t for t in existing_tag_list if t not in removed_namespace_tag]
updated_tag_list = [
t for t in existing_tag_list if t not in removed_namespace_tag
]
updated_tag_list.extend(actual_tag_to_add)
_set_item_tags(res, updated_tag_list)
@@ -672,7 +753,9 @@ class Add_Tag(Cmdlet):
_apply_title_to_result(res, final_title)
total_added += len(actual_tag_to_add)
total_modified += 1 if (removed_namespace_tag or actual_tag_to_add) else 0
total_modified += (
1 if (removed_namespace_tag or actual_tag_to_add) else 0
)
ctx.emit(res)
continue
@@ -680,14 +763,22 @@ class Add_Tag(Cmdlet):
pass
if local_mode_requested:
log("[add_tag] Error: Missing usable local path for tagging (or provide -store)", file=sys.stderr)
log(
"[add_tag] Error: Missing usable local path for tagging (or provide -store)",
file=sys.stderr,
)
return 1
if store_name_str and not is_known_backend:
log(f"[add_tag] Error: Unknown store '{store_name_str}'. Available: {store_registry.list_backends()}", file=sys.stderr)
log(
f"[add_tag] Error: Unknown store '{store_name_str}'. Available: {store_registry.list_backends()}",
file=sys.stderr,
)
return 1
resolved_hash = normalize_hash(hash_override) if hash_override else normalize_hash(raw_hash)
resolved_hash = (
normalize_hash(hash_override) if hash_override else normalize_hash(raw_hash)
)
if not resolved_hash and raw_path:
try:
p = Path(str(raw_path))
@@ -700,7 +791,10 @@ class Add_Tag(Cmdlet):
resolved_hash = None
if not resolved_hash:
log("[add_tag] Warning: Item missing usable hash (and could not derive from path); skipping", file=sys.stderr)
log(
"[add_tag] Warning: Item missing usable hash (and could not derive from path); skipping",
file=sys.stderr,
)
ctx.emit(res)
continue
@@ -724,34 +818,44 @@ class Add_Tag(Cmdlet):
if extract_template:
candidates2 = _get_title_candidates_for_extraction(res, existing_tag_list)
extracted2, matched2 = _extract_tags_from_title_candidates(candidates2, extract_template)
extracted2, matched2 = _extract_tags_from_title_candidates(
candidates2, extract_template
)
if extracted2:
extract_matched_items += 1
if extract_debug:
log(f"[add_tag] extract matched: {matched2!r} -> {extracted2}", file=sys.stderr)
log(
f"[add_tag] extract matched: {matched2!r} -> {extracted2}",
file=sys.stderr,
)
for new_tag in extracted2:
if new_tag.lower() not in existing_lower:
item_tag_to_add.append(new_tag)
else:
extract_no_match_items += 1
if extract_debug:
rx_preview2 = extract_debug_rx.pattern if extract_debug_rx else "<uncompiled>"
rx_preview2 = (
extract_debug_rx.pattern if extract_debug_rx else "<uncompiled>"
)
cand_preview2 = "; ".join([repr(c) for c in candidates2[:3]])
log(f"[add_tag] extract no match for template {extract_template!r}. regex: {rx_preview2!r}. candidates: {cand_preview2}", file=sys.stderr)
log(
f"[add_tag] extract no match for template {extract_template!r}. regex: {rx_preview2!r}. candidates: {cand_preview2}",
file=sys.stderr,
)
item_tag_to_add = collapse_namespace_tag(item_tag_to_add, "title", prefer="last")
# Handle -duplicate logic (copy existing tag to new namespaces)
if duplicate_arg:
parts = str(duplicate_arg).split(':')
parts = str(duplicate_arg).split(":")
source_ns = ""
targets: list[str] = []
if len(parts) > 1:
source_ns = parts[0]
targets = [t.strip() for t in parts[1].split(',') if t.strip()]
targets = [t.strip() for t in parts[1].split(",") if t.strip()]
else:
parts2 = str(duplicate_arg).split(',')
parts2 = str(duplicate_arg).split(",")
if len(parts2) > 1:
source_ns = parts2[0]
targets = [t.strip() for t in parts2[1:] if t.strip()]
@@ -812,11 +916,17 @@ class Add_Tag(Cmdlet):
)
if extract_template and extract_matched_items == 0:
log(f"[add_tag] extract: no matches for template '{extract_template}' across {len(results)} item(s)", file=sys.stderr)
log(
f"[add_tag] extract: no matches for template '{extract_template}' across {len(results)} item(s)",
file=sys.stderr,
)
elif extract_template and extract_no_match_items > 0 and extract_debug:
log(f"[add_tag] extract: matched {extract_matched_items}, no-match {extract_no_match_items}", file=sys.stderr)
log(
f"[add_tag] extract: matched {extract_matched_items}, no-match {extract_no_match_items}",
file=sys.stderr,
)
return 0
CMDLET = Add_Tag()
CMDLET = Add_Tag()
+28 -20
View File
@@ -29,7 +29,7 @@ class Add_Url(sh.Cmdlet):
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Add URL to file via hash+store backend."""
parsed = sh.parse_cmdlet_args(args, self)
@@ -51,9 +51,11 @@ class Add_Url(sh.Cmdlet):
if parsed.get("query") and not query_hash:
log("Error: -query must be of the form hash:<sha256>")
return 1
# Bulk input is common in pipelines; treat a list of PipeObjects as a batch.
results: List[Any] = result if isinstance(result, list) else ([result] if result is not None else [])
results: List[Any] = (
result if isinstance(result, list) else ([result] if result is not None else [])
)
if query_hash and len(results) > 1:
log("Error: -query hash:<sha256> cannot be used with multiple piped items")
@@ -61,35 +63,37 @@ class Add_Url(sh.Cmdlet):
# Extract hash and store from result or args
file_hash = query_hash or (sh.get_field(result, "hash") if result is not None else None)
store_name = parsed.get("store") or (sh.get_field(result, "store") if result is not None else None)
store_name = parsed.get("store") or (
sh.get_field(result, "store") if result is not None else None
)
url_arg = parsed.get("url")
# If we have multiple piped items, we will resolve hash/store per item below.
if not results:
if not file_hash:
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
log('Error: No file hash provided (pipe an item or use -query "hash:<sha256>")')
return 1
if not store_name:
log("Error: No store name provided")
return 1
if not url_arg:
log("Error: No URL provided")
return 1
# Normalize hash (single-item mode)
if not results and file_hash:
file_hash = sh.normalize_hash(file_hash)
if not file_hash:
log("Error: Invalid hash format")
return 1
# Parse url (comma-separated)
urls = [u.strip() for u in str(url_arg).split(',') if u.strip()]
urls = [u.strip() for u in str(url_arg).split(",") if u.strip()]
if not urls:
log("Error: No valid url provided")
return 1
# Get backend and add url
try:
storage = Store(config)
@@ -137,23 +141,30 @@ class Add_Url(sh.Cmdlet):
raw_hash = query_hash or sh.get_field(item, "hash")
raw_store = store_override or sh.get_field(item, "store")
if not raw_hash or not raw_store:
ctx.print_if_visible("[add-url] Warning: Item missing hash/store; skipping", file=sys.stderr)
ctx.print_if_visible(
"[add-url] Warning: Item missing hash/store; skipping", file=sys.stderr
)
continue
normalized = sh.normalize_hash(raw_hash)
if not normalized:
ctx.print_if_visible("[add-url] Warning: Item has invalid hash; skipping", file=sys.stderr)
ctx.print_if_visible(
"[add-url] Warning: Item has invalid hash; skipping", file=sys.stderr
)
continue
store_text = str(raw_store).strip()
if not store_text:
ctx.print_if_visible("[add-url] Warning: Item has empty store; skipping", file=sys.stderr)
ctx.print_if_visible(
"[add-url] Warning: Item has empty store; skipping", file=sys.stderr
)
continue
# Validate backend exists (skip PATH/unknown).
if not storage.is_available(store_text):
ctx.print_if_visible(
f"[add-url] Warning: Store '{store_text}' not configured; skipping", file=sys.stderr
f"[add-url] Warning: Store '{store_text}' not configured; skipping",
file=sys.stderr,
)
continue
@@ -170,7 +181,7 @@ class Add_Url(sh.Cmdlet):
merged: Dict[str, List[str]] = {}
for h, ulist in pairs:
merged.setdefault(h, [])
for u in (ulist or []):
for u in ulist or []:
if u and u not in merged[h]:
merged[h].append(u)
@@ -206,7 +217,7 @@ class Add_Url(sh.Cmdlet):
_set_item_url(result, merged)
ctx.emit(result)
return 0
except KeyError:
log(f"Error: Storage backend '{store_name}' not configured")
return 1
@@ -216,6 +227,3 @@ class Add_Url(sh.Cmdlet):
CMDLET = Add_Url()
+357 -331
View File
@@ -31,395 +31,421 @@ _SHA256_RE = re.compile(r"^[0-9a-fA-F]{64}$")
def _extract_sha256_hex(item: Any) -> str:
try:
if isinstance(item, dict):
h = item.get("hash")
else:
h = getattr(item, "hash", None)
if isinstance(h, str) and _SHA256_RE.fullmatch(h.strip()):
return h.strip().lower()
except Exception:
pass
return ""
try:
if isinstance(item, dict):
h = item.get("hash")
else:
h = getattr(item, "hash", None)
if isinstance(h, str) and _SHA256_RE.fullmatch(h.strip()):
return h.strip().lower()
except Exception:
pass
return ""
def _extract_store_name(item: Any) -> str:
try:
if isinstance(item, dict):
s = item.get("store")
else:
s = getattr(item, "store", None)
return str(s or "").strip()
except Exception:
return ""
try:
if isinstance(item, dict):
s = item.get("store")
else:
s = getattr(item, "store", None)
return str(s or "").strip()
except Exception:
return ""
def _extract_url(item: Any) -> str:
try:
u = sh.get_field(item, "url") or sh.get_field(item, "target")
if isinstance(u, str) and u.strip().lower().startswith(("http://", "https://")):
return u.strip()
except Exception:
pass
return ""
try:
u = sh.get_field(item, "url") or sh.get_field(item, "target")
if isinstance(u, str) and u.strip().lower().startswith(("http://", "https://")):
return u.strip()
except Exception:
pass
return ""
def _extract_hash_from_hydrus_file_url(url: str) -> str:
try:
parsed = urlparse(str(url))
if not (parsed.path or "").endswith("/get_files/file"):
return ""
qs = parse_qs(parsed.query or "")
h = (qs.get("hash") or [""])[0]
if isinstance(h, str) and _SHA256_RE.fullmatch(h.strip()):
return h.strip().lower()
except Exception:
pass
return ""
try:
parsed = urlparse(str(url))
if not (parsed.path or "").endswith("/get_files/file"):
return ""
qs = parse_qs(parsed.query or "")
h = (qs.get("hash") or [""])[0]
if isinstance(h, str) and _SHA256_RE.fullmatch(h.strip()):
return h.strip().lower()
except Exception:
pass
return ""
def _hydrus_instance_names(config: Dict[str, Any]) -> Set[str]:
instances: Set[str] = set()
try:
store_cfg = config.get("store") if isinstance(config, dict) else None
if isinstance(store_cfg, dict):
hydrus_cfg = store_cfg.get("hydrusnetwork")
if isinstance(hydrus_cfg, dict):
instances = {str(k).strip().lower() for k in hydrus_cfg.keys() if str(k).strip()}
except Exception:
instances = set()
return instances
instances: Set[str] = set()
try:
store_cfg = config.get("store") if isinstance(config, dict) else None
if isinstance(store_cfg, dict):
hydrus_cfg = store_cfg.get("hydrusnetwork")
if isinstance(hydrus_cfg, dict):
instances = {str(k).strip().lower() for k in hydrus_cfg.keys() if str(k).strip()}
except Exception:
instances = set()
return instances
def _maybe_download_hydrus_item(item: Any, config: Dict[str, Any], output_dir: Path) -> Path | None:
"""Download a Hydrus-backed item to a local temp path (best-effort).
"""Download a Hydrus-backed item to a local temp path (best-effort).
This is intentionally side-effect free except for writing the local temp file.
"""
try:
from config import get_hydrus_access_key, get_hydrus_url
from API.HydrusNetwork import HydrusNetwork as HydrusClient, download_hydrus_file
except Exception:
return None
This is intentionally side-effect free except for writing the local temp file.
"""
try:
from config import get_hydrus_access_key, get_hydrus_url
from API.HydrusNetwork import HydrusNetwork as HydrusClient, download_hydrus_file
except Exception:
return None
store_name = _extract_store_name(item)
store_lower = store_name.lower()
hydrus_instances = _hydrus_instance_names(config)
store_hint = store_lower in {"hydrus", "hydrusnetwork"} or (store_lower in hydrus_instances)
store_name = _extract_store_name(item)
store_lower = store_name.lower()
hydrus_instances = _hydrus_instance_names(config)
store_hint = store_lower in {"hydrus", "hydrusnetwork"} or (store_lower in hydrus_instances)
url = _extract_url(item)
file_hash = _extract_sha256_hex(item) or (_extract_hash_from_hydrus_file_url(url) if url else "")
if not file_hash:
return None
url = _extract_url(item)
file_hash = _extract_sha256_hex(item) or (
_extract_hash_from_hydrus_file_url(url) if url else ""
)
if not file_hash:
return None
# Only treat it as Hydrus when we have an explicit Hydrus file URL OR the store suggests it.
is_hydrus_url = False
if url:
try:
parsed = urlparse(url)
is_hydrus_url = (parsed.path or "").endswith("/get_files/file") and _extract_hash_from_hydrus_file_url(url) == file_hash
except Exception:
is_hydrus_url = False
if not (is_hydrus_url or store_hint):
return None
# Only treat it as Hydrus when we have an explicit Hydrus file URL OR the store suggests it.
is_hydrus_url = False
if url:
try:
parsed = urlparse(url)
is_hydrus_url = (parsed.path or "").endswith(
"/get_files/file"
) and _extract_hash_from_hydrus_file_url(url) == file_hash
except Exception:
is_hydrus_url = False
if not (is_hydrus_url or store_hint):
return None
# Prefer store name as instance key; fall back to "home".
access_key = None
hydrus_url = None
for inst in [s for s in [store_lower, "home"] if s]:
try:
access_key = (get_hydrus_access_key(config, inst) or "").strip() or None
hydrus_url = (get_hydrus_url(config, inst) or "").strip() or None
if access_key and hydrus_url:
break
except Exception:
access_key = None
hydrus_url = None
# Prefer store name as instance key; fall back to "home".
access_key = None
hydrus_url = None
for inst in [s for s in [store_lower, "home"] if s]:
try:
access_key = (get_hydrus_access_key(config, inst) or "").strip() or None
hydrus_url = (get_hydrus_url(config, inst) or "").strip() or None
if access_key and hydrus_url:
break
except Exception:
access_key = None
hydrus_url = None
if not access_key or not hydrus_url:
return None
if not access_key or not hydrus_url:
return None
client = HydrusClient(url=hydrus_url, access_key=access_key, timeout=60.0)
file_url = url if (url and is_hydrus_url) else client.file_url(file_hash)
client = HydrusClient(url=hydrus_url, access_key=access_key, timeout=60.0)
file_url = url if (url and is_hydrus_url) else client.file_url(file_hash)
# Best-effort extension from Hydrus metadata.
suffix = ".hydrus"
try:
meta_response = client.fetch_file_metadata(hashes=[file_hash], include_mime=True)
entries = meta_response.get("metadata") if isinstance(meta_response, dict) else None
if isinstance(entries, list) and entries:
entry = entries[0]
if isinstance(entry, dict):
ext = entry.get("ext")
if isinstance(ext, str) and ext.strip():
cleaned = ext.strip()
if not cleaned.startswith("."):
cleaned = "." + cleaned.lstrip(".")
if len(cleaned) <= 12:
suffix = cleaned
except Exception:
pass
# Best-effort extension from Hydrus metadata.
suffix = ".hydrus"
try:
meta_response = client.fetch_file_metadata(hashes=[file_hash], include_mime=True)
entries = meta_response.get("metadata") if isinstance(meta_response, dict) else None
if isinstance(entries, list) and entries:
entry = entries[0]
if isinstance(entry, dict):
ext = entry.get("ext")
if isinstance(ext, str) and ext.strip():
cleaned = ext.strip()
if not cleaned.startswith("."):
cleaned = "." + cleaned.lstrip(".")
if len(cleaned) <= 12:
suffix = cleaned
except Exception:
pass
try:
output_dir.mkdir(parents=True, exist_ok=True)
except Exception:
pass
try:
output_dir.mkdir(parents=True, exist_ok=True)
except Exception:
pass
dest = output_dir / f"{file_hash}{suffix}"
if dest.exists():
dest = output_dir / f"{file_hash}_{uuid.uuid4().hex[:10]}{suffix}"
dest = output_dir / f"{file_hash}{suffix}"
if dest.exists():
dest = output_dir / f"{file_hash}_{uuid.uuid4().hex[:10]}{suffix}"
headers = {"Hydrus-Client-API-Access-Key": access_key}
download_hydrus_file(file_url, headers, dest, timeout=60.0)
try:
if dest.exists() and dest.is_file():
return dest
except Exception:
return None
return None
headers = {"Hydrus-Client-API-Access-Key": access_key}
download_hydrus_file(file_url, headers, dest, timeout=60.0)
try:
if dest.exists() and dest.is_file():
return dest
except Exception:
return None
return None
def _resolve_existing_or_fetch_path(item: Any, config: Dict[str, Any]) -> tuple[Path | None, Path | None]:
"""Return (path, temp_path) where temp_path is non-None only for files we downloaded."""
# 1) Direct local path
try:
po = coerce_to_pipe_object(item, None)
raw_path = getattr(po, "path", None) or getattr(po, "target", None) or sh.get_pipe_object_path(item)
if raw_path:
p = Path(str(raw_path)).expanduser()
if p.exists():
return p, None
except Exception:
pass
def _resolve_existing_or_fetch_path(
item: Any, config: Dict[str, Any]
) -> tuple[Path | None, Path | None]:
"""Return (path, temp_path) where temp_path is non-None only for files we downloaded."""
# 1) Direct local path
try:
po = coerce_to_pipe_object(item, None)
raw_path = (
getattr(po, "path", None)
or getattr(po, "target", None)
or sh.get_pipe_object_path(item)
)
if raw_path:
p = Path(str(raw_path)).expanduser()
if p.exists():
return p, None
except Exception:
pass
# 2) Store-backed path
file_hash = _extract_sha256_hex(item)
store_name = _extract_store_name(item)
if file_hash and store_name:
try:
from Store import Store
# 2) Store-backed path
file_hash = _extract_sha256_hex(item)
store_name = _extract_store_name(item)
if file_hash and store_name:
try:
from Store import Store
store = Store(config)
backend = store[store_name]
src = backend.get_file(file_hash)
if isinstance(src, Path):
if src.exists():
return src, None
elif isinstance(src, str) and src.strip():
cand = Path(src).expanduser()
if cand.exists():
return cand, None
# If the backend returns a URL (HydrusNetwork), download it.
if src.strip().lower().startswith(("http://", "https://")):
tmp_base = None
try:
tmp_base = config.get("temp") if isinstance(config, dict) else None
except Exception:
tmp_base = None
out_dir = Path(str(tmp_base)).expanduser() if tmp_base else (Path(tempfile.gettempdir()) / "Medios-Macina")
out_dir = out_dir / "archive" / "hydrus"
downloaded = _maybe_download_hydrus_item({"hash": file_hash, "store": store_name, "url": src.strip()}, config, out_dir)
if downloaded is not None:
return downloaded, downloaded
except Exception:
pass
store = Store(config)
backend = store[store_name]
src = backend.get_file(file_hash)
if isinstance(src, Path):
if src.exists():
return src, None
elif isinstance(src, str) and src.strip():
cand = Path(src).expanduser()
if cand.exists():
return cand, None
# If the backend returns a URL (HydrusNetwork), download it.
if src.strip().lower().startswith(("http://", "https://")):
tmp_base = None
try:
tmp_base = config.get("temp") if isinstance(config, dict) else None
except Exception:
tmp_base = None
out_dir = (
Path(str(tmp_base)).expanduser()
if tmp_base
else (Path(tempfile.gettempdir()) / "Medios-Macina")
)
out_dir = out_dir / "archive" / "hydrus"
downloaded = _maybe_download_hydrus_item(
{"hash": file_hash, "store": store_name, "url": src.strip()},
config,
out_dir,
)
if downloaded is not None:
return downloaded, downloaded
except Exception:
pass
# 3) Hydrus-backed items without backend.get_file path.
try:
tmp_base = config.get("temp") if isinstance(config, dict) else None
except Exception:
tmp_base = None
out_dir = Path(str(tmp_base)).expanduser() if tmp_base else (Path(tempfile.gettempdir()) / "Medios-Macina")
out_dir = out_dir / "archive" / "hydrus"
downloaded = _maybe_download_hydrus_item(item, config, out_dir)
if downloaded is not None:
return downloaded, downloaded
# 3) Hydrus-backed items without backend.get_file path.
try:
tmp_base = config.get("temp") if isinstance(config, dict) else None
except Exception:
tmp_base = None
out_dir = (
Path(str(tmp_base)).expanduser()
if tmp_base
else (Path(tempfile.gettempdir()) / "Medios-Macina")
)
out_dir = out_dir / "archive" / "hydrus"
downloaded = _maybe_download_hydrus_item(item, config, out_dir)
if downloaded is not None:
return downloaded, downloaded
return None, None
return None, None
def _unique_arcname(name: str, seen: Set[str]) -> str:
base = str(name or "").replace("\\", "/")
base = base.lstrip("/")
if not base:
base = "file"
if base not in seen:
seen.add(base)
return base
base = str(name or "").replace("\\", "/")
base = base.lstrip("/")
if not base:
base = "file"
if base not in seen:
seen.add(base)
return base
stem = base
suffix = ""
if "/" not in base:
p = Path(base)
stem = p.stem
suffix = p.suffix
stem = base
suffix = ""
if "/" not in base:
p = Path(base)
stem = p.stem
suffix = p.suffix
n = 2
while True:
candidate = f"{stem} ({n}){suffix}" if stem else f"file ({n}){suffix}"
if candidate not in seen:
seen.add(candidate)
return candidate
n += 1
n = 2
while True:
candidate = f"{stem} ({n}){suffix}" if stem else f"file ({n}){suffix}"
if candidate not in seen:
seen.add(candidate)
return candidate
n += 1
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if should_show_help(args):
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
if should_show_help(args):
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
parsed = parse_cmdlet_args(args, CMDLET)
parsed = parse_cmdlet_args(args, CMDLET)
level_raw = parsed.get("level")
try:
level = int(level_raw) if level_raw is not None else 11
except Exception:
level = 11
if level < 1:
level = 1
if level > 22:
level = 22
level_raw = parsed.get("level")
try:
level = int(level_raw) if level_raw is not None else 11
except Exception:
level = 11
if level < 1:
level = 1
if level > 22:
level = 22
# Output destination is controlled by the shared -path behavior in the pipeline runner.
# This cmdlet always creates the archive in the configured output directory and emits it.
# Output destination is controlled by the shared -path behavior in the pipeline runner.
# This cmdlet always creates the archive in the configured output directory and emits it.
# Collect piped items; archive-file is a batch command (single output).
items: List[Any] = []
if isinstance(result, list):
items = list(result)
elif result is not None:
items = [result]
# Collect piped items; archive-file is a batch command (single output).
items: List[Any] = []
if isinstance(result, list):
items = list(result)
elif result is not None:
items = [result]
if not items:
log("No piped items provided to archive-file", file=sys.stderr)
return 1
if not items:
log("No piped items provided to archive-file", file=sys.stderr)
return 1
temp_downloads: List[Path] = []
try:
paths: List[Path] = []
for it in items:
p, tmp = _resolve_existing_or_fetch_path(it, config)
if p is None:
continue
paths.append(p)
if tmp is not None:
temp_downloads.append(tmp)
temp_downloads: List[Path] = []
try:
paths: List[Path] = []
for it in items:
p, tmp = _resolve_existing_or_fetch_path(it, config)
if p is None:
continue
paths.append(p)
if tmp is not None:
temp_downloads.append(tmp)
# Keep stable order, remove duplicates.
uniq: List[Path] = []
seen_paths: Set[str] = set()
for p in paths:
key = str(p.resolve()) if p.exists() else str(p)
if key in seen_paths:
continue
seen_paths.add(key)
uniq.append(p)
paths = uniq
# Keep stable order, remove duplicates.
uniq: List[Path] = []
seen_paths: Set[str] = set()
for p in paths:
key = str(p.resolve()) if p.exists() else str(p)
if key in seen_paths:
continue
seen_paths.add(key)
uniq.append(p)
paths = uniq
if not paths:
log("No existing file paths found in piped items", file=sys.stderr)
return 1
if not paths:
log("No existing file paths found in piped items", file=sys.stderr)
return 1
out_dir = resolve_output_dir(config)
try:
out_dir.mkdir(parents=True, exist_ok=True)
except Exception:
pass
stamp = time.strftime("%Y%m%d_%H%M%S")
out_path = out_dir / f"archive_{stamp}.tar.zst"
try:
out_path = sh._unique_destination_path(out_path) # type: ignore[attr-defined]
except Exception:
pass
out_dir = resolve_output_dir(config)
try:
out_dir.mkdir(parents=True, exist_ok=True)
except Exception:
pass
stamp = time.strftime("%Y%m%d_%H%M%S")
out_path = out_dir / f"archive_{stamp}.tar.zst"
try:
out_path = sh._unique_destination_path(out_path) # type: ignore[attr-defined]
except Exception:
pass
try:
out_path.parent.mkdir(parents=True, exist_ok=True)
except Exception as exc:
log(f"Failed to create output directory: {out_path.parent} ({exc})", file=sys.stderr)
return 1
try:
out_path.parent.mkdir(parents=True, exist_ok=True)
except Exception as exc:
log(f"Failed to create output directory: {out_path.parent} ({exc})", file=sys.stderr)
return 1
# Import zstandard lazily so the rest of the CLI still runs without it.
try:
import zstandard as zstd # type: ignore
except Exception:
log("Missing dependency: zstandard (pip install zstandard)", file=sys.stderr)
return 1
# Import zstandard lazily so the rest of the CLI still runs without it.
try:
import zstandard as zstd # type: ignore
except Exception:
log("Missing dependency: zstandard (pip install zstandard)", file=sys.stderr)
return 1
# Write tar stream into zstd stream.
try:
with open(out_path, "wb") as out_handle:
cctx = zstd.ZstdCompressor(level=level)
with cctx.stream_writer(out_handle) as compressor:
with tarfile.open(fileobj=compressor, mode="w|", format=tarfile.PAX_FORMAT) as tf:
seen_names: Set[str] = set()
for p in paths:
arcname = _unique_arcname(p.name, seen_names)
# For directories, tarfile will include contents when recursive=True.
try:
tf.add(str(p), arcname=arcname, recursive=True)
except Exception as exc:
log(f"Failed to add to archive: {p} ({exc})", file=sys.stderr)
except Exception as exc:
log(f"Archive creation failed: {exc}", file=sys.stderr)
return 1
# Write tar stream into zstd stream.
try:
with open(out_path, "wb") as out_handle:
cctx = zstd.ZstdCompressor(level=level)
with cctx.stream_writer(out_handle) as compressor:
with tarfile.open(
fileobj=compressor, mode="w|", format=tarfile.PAX_FORMAT
) as tf:
seen_names: Set[str] = set()
for p in paths:
arcname = _unique_arcname(p.name, seen_names)
# For directories, tarfile will include contents when recursive=True.
try:
tf.add(str(p), arcname=arcname, recursive=True)
except Exception as exc:
log(f"Failed to add to archive: {p} ({exc})", file=sys.stderr)
except Exception as exc:
log(f"Archive creation failed: {exc}", file=sys.stderr)
return 1
# Emit a single artifact downstream.
hash_value = None
try:
from SYS.utils import sha256_file
# Emit a single artifact downstream.
hash_value = None
try:
from SYS.utils import sha256_file
hash_value = sha256_file(out_path)
except Exception:
hash_value = None
hash_value = sha256_file(out_path)
except Exception:
hash_value = None
pipe_obj = create_pipe_object_result(
source="archive",
identifier=out_path.stem,
file_path=str(out_path),
cmdlet_name="archive-file",
title=out_path.name,
hash_value=hash_value,
is_temp=True,
store="PATH",
extra={
"target": str(out_path),
"archive_format": "tar.zst",
"compression": "zstd",
"level": level,
"source_count": len(paths),
"source_paths": [str(p) for p in paths],
},
)
ctx.emit(pipe_obj)
return 0
finally:
# Best-effort cleanup of any temp Hydrus downloads we created.
for tmp in temp_downloads:
try:
tmp.unlink(missing_ok=True) # type: ignore[arg-type]
except TypeError:
try:
if tmp.exists():
tmp.unlink()
except Exception:
pass
except Exception:
pass
pipe_obj = create_pipe_object_result(
source="archive",
identifier=out_path.stem,
file_path=str(out_path),
cmdlet_name="archive-file",
title=out_path.name,
hash_value=hash_value,
is_temp=True,
store="PATH",
extra={
"target": str(out_path),
"archive_format": "tar.zst",
"compression": "zstd",
"level": level,
"source_count": len(paths),
"source_paths": [str(p) for p in paths],
},
)
ctx.emit(pipe_obj)
return 0
finally:
# Best-effort cleanup of any temp Hydrus downloads we created.
for tmp in temp_downloads:
try:
tmp.unlink(missing_ok=True) # type: ignore[arg-type]
except TypeError:
try:
if tmp.exists():
tmp.unlink()
except Exception:
pass
except Exception:
pass
CMDLET = Cmdlet(
name="archive-file",
summary="Archive piped files into a single .tar.zst.",
usage="@N | archive-file [-level <1-22>] [-path <path>]",
arg=[
CmdletArg("-level", type="integer", description="Zstandard compression level (default: 11)."),
SharedArgs.PATH,
],
detail=[
"- Example: @1-5 | archive-file",
"- Default zstd level is 11.",
"- Emits one output item (the archive) for downstream piping.",
],
name="archive-file",
summary="Archive piped files into a single .tar.zst.",
usage="@N | archive-file [-level <1-22>] [-path <path>]",
arg=[
CmdletArg(
"-level", type="integer", description="Zstandard compression level (default: 11)."
),
SharedArgs.PATH,
],
detail=[
"- Example: @1-5 | archive-file",
"- Default zstd level is 11.",
"- Emits one output item (the archive) for downstream piping.",
],
)
CMDLET.exec = _run
+71 -28
View File
@@ -1,4 +1,5 @@
"""Delete-file cmdlet: Delete files from local storage and/or Hydrus."""
from __future__ import annotations
from typing import Any, Dict, List, Sequence
@@ -23,12 +24,16 @@ class Delete_File(sh.Cmdlet):
super().__init__(
name="delete-file",
summary="Delete a file locally and/or from Hydrus, including database entries.",
usage="delete-file [-query \"hash:<sha256>\"] [-conserve <local|hydrus>] [-lib-root <path>] [reason]",
usage='delete-file [-query "hash:<sha256>"] [-conserve <local|hydrus>] [-lib-root <path>] [reason]',
alias=["del-file"],
arg=[
sh.SharedArgs.QUERY,
sh.CmdletArg("conserve", description="Choose which copy to keep: 'local' or 'hydrus'."),
sh.CmdletArg("lib-root", description="Path to local library root for database cleanup."),
sh.CmdletArg(
"conserve", description="Choose which copy to keep: 'local' or 'hydrus'."
),
sh.CmdletArg(
"lib-root", description="Path to local library root for database cleanup."
),
sh.CmdletArg("reason", description="Optional reason for deletion (free text)."),
],
detail=[
@@ -62,7 +67,11 @@ class Delete_File(sh.Cmdlet):
title_val = item.get("title") or item.get("name")
else:
hash_hex_raw = sh.get_field(item, "hash_hex") or sh.get_field(item, "hash")
target = sh.get_field(item, "target") or sh.get_field(item, "file_path") or sh.get_field(item, "path")
target = (
sh.get_field(item, "target")
or sh.get_field(item, "file_path")
or sh.get_field(item, "path")
)
title_val = sh.get_field(item, "title") or sh.get_field(item, "name")
def _get_ext_from_item() -> str:
@@ -102,7 +111,7 @@ class Delete_File(sh.Cmdlet):
pass
return ""
store = None
if isinstance(item, dict):
store = item.get("store")
@@ -133,19 +142,29 @@ class Delete_File(sh.Cmdlet):
is_hydrus_store = False
# Backwards-compatible fallback heuristic (older items might only carry a name).
if (not is_hydrus_store) and bool(store_lower) and ("hydrus" in store_lower or store_lower in {"home", "work"}):
if (
(not is_hydrus_store)
and bool(store_lower)
and ("hydrus" in store_lower or store_lower in {"home", "work"})
):
is_hydrus_store = True
store_label = str(store) if store else "default"
hydrus_prefix = f"[hydrusnetwork:{store_label}]"
# For Hydrus files, the target IS the hash
if is_hydrus_store and not hash_hex_raw:
hash_hex_raw = target
hash_hex = sh.normalize_hash(override_hash) if override_hash else sh.normalize_hash(hash_hex_raw)
hash_hex = (
sh.normalize_hash(override_hash) if override_hash else sh.normalize_hash(hash_hex_raw)
)
local_deleted = False
local_target = isinstance(target, str) and target.strip() and not str(target).lower().startswith(("http://", "https://"))
local_target = (
isinstance(target, str)
and target.strip()
and not str(target).lower().startswith(("http://", "https://"))
)
deleted_rows: List[Dict[str, Any]] = []
# If this item references a configured non-Hydrus store backend, prefer deleting
@@ -169,11 +188,15 @@ class Delete_File(sh.Cmdlet):
try:
if hash_candidate and hasattr(backend, "get_file"):
candidate_path = backend.get_file(hash_candidate)
resolved_path = candidate_path if isinstance(candidate_path, Path) else None
resolved_path = (
candidate_path if isinstance(candidate_path, Path) else None
)
except Exception:
resolved_path = None
identifier = hash_candidate or (str(target).strip() if isinstance(target, str) else "")
identifier = hash_candidate or (
str(target).strip() if isinstance(target, str) else ""
)
if identifier:
deleter = getattr(backend, "delete_file", None)
if callable(deleter) and bool(deleter(identifier)):
@@ -181,18 +204,27 @@ class Delete_File(sh.Cmdlet):
size_bytes: int | None = None
try:
if resolved_path is not None and isinstance(resolved_path, Path) and resolved_path.exists():
if (
resolved_path is not None
and isinstance(resolved_path, Path)
and resolved_path.exists()
):
size_bytes = int(resolved_path.stat().st_size)
except Exception:
size_bytes = None
deleted_rows.append(
{
"title": str(title_val).strip() if title_val else (resolved_path.name if resolved_path else identifier),
"title": (
str(title_val).strip()
if title_val
else (resolved_path.name if resolved_path else identifier)
),
"store": store_label,
"hash": hash_candidate or (hash_hex or ""),
"size_bytes": size_bytes,
"ext": _get_ext_from_item() or (resolved_path.suffix.lstrip(".") if resolved_path else ""),
"ext": _get_ext_from_item()
or (resolved_path.suffix.lstrip(".") if resolved_path else ""),
}
)
@@ -216,7 +248,7 @@ class Delete_File(sh.Cmdlet):
local_target = False
except Exception:
pass
if conserve != "local" and local_target:
path = Path(str(target))
size_bytes: int | None = None
@@ -225,7 +257,7 @@ class Delete_File(sh.Cmdlet):
size_bytes = int(path.stat().st_size)
except Exception:
size_bytes = None
# If lib_root is provided and this is from a folder store, use the Folder class
if lib_root:
try:
@@ -276,7 +308,7 @@ class Delete_File(sh.Cmdlet):
)
except Exception as exc:
log(f"Local delete failed: {exc}", file=sys.stderr)
# Remove common sidecars regardless of file removal success
for sidecar in (
path.with_suffix(".tag"),
@@ -291,11 +323,11 @@ class Delete_File(sh.Cmdlet):
hydrus_deleted = False
should_try_hydrus = is_hydrus_store
# If conserve is set to hydrus, definitely don't delete
if conserve == "hydrus":
should_try_hydrus = False
if should_try_hydrus and hash_hex:
# Prefer deleting via the resolved store backend when it is a HydrusNetwork store.
# This ensures store-specific post-delete hooks run (e.g., clearing Hydrus deletion records).
@@ -312,7 +344,10 @@ class Delete_File(sh.Cmdlet):
hydrus_deleted = True
title_str = str(title_val).strip() if title_val else ""
if title_str:
debug(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}", file=sys.stderr)
debug(
f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}",
file=sys.stderr,
)
else:
debug(f"{hydrus_prefix} Deleted hash:{hash_hex}", file=sys.stderr)
else:
@@ -328,7 +363,10 @@ class Delete_File(sh.Cmdlet):
client = candidate
except Exception as exc:
if not local_deleted:
log(f"Hydrus client unavailable for store '{store}': {exc}", file=sys.stderr)
log(
f"Hydrus client unavailable for store '{store}': {exc}",
file=sys.stderr,
)
return False
if client is None:
if not local_deleted:
@@ -365,7 +403,10 @@ class Delete_File(sh.Cmdlet):
hydrus_deleted = True
title_str = str(title_val).strip() if title_val else ""
if title_str:
debug(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}", file=sys.stderr)
debug(
f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}",
file=sys.stderr,
)
else:
debug(f"{hydrus_prefix} Deleted hash:{hash_hex}", file=sys.stderr)
except Exception:
@@ -411,7 +452,7 @@ class Delete_File(sh.Cmdlet):
lib_root: str | None = None
reason_tokens: list[str] = []
i = 0
while i < len(args):
token = args[i]
low = str(token).lower()
@@ -460,7 +501,7 @@ class Delete_File(sh.Cmdlet):
items = result
elif result:
items = [result]
if not items:
log("No items to delete", file=sys.stderr)
return 1
@@ -468,7 +509,9 @@ class Delete_File(sh.Cmdlet):
success_count = 0
deleted_rows: List[Dict[str, Any]] = []
for item in items:
rows = self._process_single_item(item, override_hash, conserve, lib_root, reason, config)
rows = self._process_single_item(
item, override_hash, conserve, lib_root, reason, config
)
if rows:
success_count += 1
deleted_rows.extend(rows)
@@ -481,7 +524,9 @@ class Delete_File(sh.Cmdlet):
result_row.add_column("Title", row.get("title", ""))
result_row.add_column("Store", row.get("store", ""))
result_row.add_column("Hash", row.get("hash", ""))
result_row.add_column("Size", _format_size(row.get("size_bytes"), integer_only=False))
result_row.add_column(
"Size", _format_size(row.get("size_bytes"), integer_only=False)
)
result_row.add_column("Ext", row.get("ext", ""))
# Display-only: print directly and do not affect selection/history.
@@ -504,5 +549,3 @@ class Delete_File(sh.Cmdlet):
# Instantiate and register the cmdlet
Delete_File()
+26 -8
View File
@@ -26,12 +26,14 @@ class Delete_Note(Cmdlet):
super().__init__(
name="delete-note",
summary="Delete a named note from a file in a store.",
usage="delete-note -store <store> [-query \"hash:<sha256>\"] <name>",
usage='delete-note -store <store> [-query "hash:<sha256>"] <name>',
alias=["del-note"],
arg=[
SharedArgs.STORE,
SharedArgs.QUERY,
CmdletArg("name", type="string", required=True, description="The note name/key to delete."),
CmdletArg(
"name", type="string", required=True, description="The note name/key to delete."
),
],
detail=[
"- Deletes the named note from the selected store backend.",
@@ -44,7 +46,9 @@ class Delete_Note(Cmdlet):
pass
self.register()
def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]:
def _resolve_hash(
self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]
) -> Optional[str]:
resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash)
if resolved:
return resolved
@@ -76,7 +80,10 @@ class Delete_Note(Cmdlet):
# Allow piping note rows from get-note: the selected item carries note_name.
inferred_note_name = str(get_field(result, "note_name") or "").strip()
if not note_name_override and not inferred_note_name:
log("[delete_note] Error: Requires <name> (or pipe a note row that provides note_name)", file=sys.stderr)
log(
"[delete_note] Error: Requires <name> (or pipe a note row that provides note_name)",
file=sys.stderr,
)
return 1
results = normalize_result_input(result)
@@ -84,7 +91,10 @@ class Delete_Note(Cmdlet):
if store_override and query_hash:
results = [{"store": str(store_override), "hash": query_hash}]
else:
log("[delete_note] Error: Requires piped item(s) or -store and -query \"hash:<sha256>\"", file=sys.stderr)
log(
'[delete_note] Error: Requires piped item(s) or -store and -query "hash:<sha256>"',
file=sys.stderr,
)
return 1
store_registry = Store(config)
@@ -96,9 +106,14 @@ class Delete_Note(Cmdlet):
continue
# Resolve which note name to delete for this item.
note_name = note_name_override or str(res.get("note_name") or "").strip() or inferred_note_name
note_name = (
note_name_override or str(res.get("note_name") or "").strip() or inferred_note_name
)
if not note_name:
log("[delete_note] Error: Missing note name (pass <name> or pipe a note row)", file=sys.stderr)
log(
"[delete_note] Error: Missing note name (pass <name> or pipe a note row)",
file=sys.stderr,
)
return 1
store_name = str(store_override or res.get("store") or "").strip()
@@ -106,7 +121,10 @@ class Delete_Note(Cmdlet):
raw_path = res.get("path")
if not store_name:
log("[delete_note] Error: Missing -store and item has no store field", file=sys.stderr)
log(
"[delete_note] Error: Missing -store and item has no store field",
file=sys.stderr,
)
return 1
resolved_hash = self._resolve_hash(
+76 -23
View File
@@ -30,7 +30,9 @@ def _extract_hash(item: Any) -> Optional[str]:
return normalize_hash(str(h)) if h else None
def _upsert_relationships(db: API_folder_store, file_hash: str, relationships: Dict[str, Any]) -> None:
def _upsert_relationships(
db: API_folder_store, file_hash: str, relationships: Dict[str, Any]
) -> None:
conn = db.connection
if conn is None:
raise RuntimeError("Store DB connection is not initialized")
@@ -48,7 +50,9 @@ def _upsert_relationships(db: API_folder_store, file_hash: str, relationships: D
)
def _remove_reverse_link(db: API_folder_store, *, src_hash: str, dst_hash: str, rel_type: str) -> None:
def _remove_reverse_link(
db: API_folder_store, *, src_hash: str, dst_hash: str, rel_type: str
) -> None:
meta = db.get_metadata(dst_hash) or {}
rels = meta.get("relationships") if isinstance(meta, dict) else None
if not isinstance(rels, dict) or not rels:
@@ -78,7 +82,12 @@ def _remove_reverse_link(db: API_folder_store, *, src_hash: str, dst_hash: str,
_upsert_relationships(db, dst_hash, rels)
def _refresh_relationship_view_if_current(target_hash: Optional[str], target_path: Optional[str], other: Optional[str], config: Dict[str, Any]) -> None:
def _refresh_relationship_view_if_current(
target_hash: Optional[str],
target_path: Optional[str],
other: Optional[str],
config: Dict[str, Any],
) -> None:
"""If the current subject matches the target, refresh relationships via get-relationship."""
try:
from cmdlet import get as get_cmdlet # type: ignore
@@ -129,12 +138,12 @@ def _refresh_relationship_view_if_current(target_hash: Optional[str], target_pat
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Delete relationships from files.
Args:
result: Input result(s) from previous cmdlet
args: Command arguments
config: CLI configuration
Returns:
Exit code (0 = success)
"""
@@ -153,7 +162,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
log("Invalid -query value (expected hash:<sha256>)", file=sys.stderr)
return 1
raw_path = parsed_args.get("path")
# Normalize input
results = normalize_result_input(result)
@@ -163,7 +172,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
log("-store is required when using -query without piped items", file=sys.stderr)
return 1
results = [{"hash": h, "store": str(override_store)} for h in override_hashes]
if not results:
# Legacy -path mode below may still apply
if raw_path:
@@ -179,9 +188,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if len(stores) == 1:
store_name = next(iter(stores))
elif len(stores) > 1:
log("Multiple stores detected in pipeline; use -store to choose one", file=sys.stderr)
log(
"Multiple stores detected in pipeline; use -store to choose one",
file=sys.stderr,
)
return 1
deleted_count = 0
# STORE/HASH FIRST: folder-store DB deletion (preferred)
@@ -208,7 +220,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Enforce same-store when items carry store info
item_store = get_field(single_result, "store")
if item_store and str(item_store) != str(store_name):
log(f"Cross-store delete blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr)
log(
f"Cross-store delete blocked: item store '{item_store}' != '{store_name}'",
file=sys.stderr,
)
return 1
file_hash = _extract_hash(single_result)
@@ -225,7 +240,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
except Exception:
file_hash = None
if not file_hash:
log("Could not extract file hash for deletion (use -query \"hash:<sha256>\" or ensure pipeline includes hash)", file=sys.stderr)
log(
'Could not extract file hash for deletion (use -query "hash:<sha256>" or ensure pipeline includes hash)',
file=sys.stderr,
)
return 1
meta = db.get_metadata(file_hash) or {}
@@ -241,7 +259,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
for other_hash in hashes:
other_norm = normalize_hash(str(other_hash))
if other_norm:
_remove_reverse_link(db, src_hash=file_hash, dst_hash=other_norm, rel_type=str(rt))
_remove_reverse_link(
db,
src_hash=file_hash,
dst_hash=other_norm,
rel_type=str(rt),
)
rels = {}
elif rel_type_filter:
# delete one type (case-insensitive key match)
@@ -257,13 +280,21 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
for other_hash in hashes:
other_norm = normalize_hash(str(other_hash))
if other_norm:
_remove_reverse_link(db, src_hash=file_hash, dst_hash=other_norm, rel_type=str(key_to_delete))
_remove_reverse_link(
db,
src_hash=file_hash,
dst_hash=other_norm,
rel_type=str(key_to_delete),
)
try:
del rels[key_to_delete]
except Exception:
rels[key_to_delete] = []
else:
log("Specify --all to delete all relationships or -type <type> to delete specific type", file=sys.stderr)
log(
"Specify --all to delete all relationships or -type <type> to delete specific type",
file=sys.stderr,
)
return 1
_upsert_relationships(db, file_hash, rels)
@@ -271,12 +302,15 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
_refresh_relationship_view_if_current(file_hash, None, None, config)
deleted_count += 1
log(f"Successfully deleted relationships from {deleted_count} file(s)", file=sys.stderr)
log(
f"Successfully deleted relationships from {deleted_count} file(s)",
file=sys.stderr,
)
return 0
except Exception as exc:
log(f"Error deleting store relationships: {exc}", file=sys.stderr)
return 1
# LEGACY PATH MODE (single local DB)
# Get storage path
local_storage_path = get_local_storage_path(config)
@@ -330,7 +364,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
for other_hash in hashes:
other_norm = normalize_hash(str(other_hash))
if other_norm:
_remove_reverse_link(db, src_hash=file_hash, dst_hash=other_norm, rel_type=str(rt))
_remove_reverse_link(
db,
src_hash=file_hash,
dst_hash=other_norm,
rel_type=str(rt),
)
rels = {}
elif rel_type_filter:
key_to_delete: Optional[str] = None
@@ -345,26 +384,36 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
for other_hash in hashes:
other_norm = normalize_hash(str(other_hash))
if other_norm:
_remove_reverse_link(db, src_hash=file_hash, dst_hash=other_norm, rel_type=str(key_to_delete))
_remove_reverse_link(
db,
src_hash=file_hash,
dst_hash=other_norm,
rel_type=str(key_to_delete),
)
try:
del rels[key_to_delete]
except Exception:
rels[key_to_delete] = []
else:
log("Specify --all to delete all relationships or -type <type> to delete specific type", file=sys.stderr)
log(
"Specify --all to delete all relationships or -type <type> to delete specific type",
file=sys.stderr,
)
return 1
_upsert_relationships(db, file_hash, rels)
conn.commit()
_refresh_relationship_view_if_current(file_hash, str(file_path_obj), None, config)
_refresh_relationship_view_if_current(
file_hash, str(file_path_obj), None, config
)
deleted_count += 1
except Exception as exc:
log(f"Error deleting relationship: {exc}", file=sys.stderr)
return 1
log(f"Successfully deleted relationships from {deleted_count} file(s)", file=sys.stderr)
return 0
except Exception as exc:
log(f"Error in delete-relationship: {exc}", file=sys.stderr)
return 1
@@ -379,7 +428,11 @@ CMDLET = Cmdlet(
SharedArgs.STORE,
SharedArgs.QUERY,
CmdletArg("all", type="flag", description="Delete all relationships for the file(s)."),
CmdletArg("type", type="string", description="Delete specific relationship type ('alt', 'king', 'related'). Default: delete all types."),
CmdletArg(
"type",
type="string",
description="Delete specific relationship type ('alt', 'king', 'related'). Default: delete all types.",
),
],
detail=[
"- Delete all relationships: pipe files | delete-relationship --all",
+78 -36
View File
@@ -20,7 +20,9 @@ from SYS.logger import debug, log
from Store import Store
def _refresh_tag_view_if_current(file_hash: str | None, store_name: str | None, path: str | None, config: Dict[str, Any]) -> None:
def _refresh_tag_view_if_current(
file_hash: str | None, store_name: str | None, path: str | None, config: Dict[str, Any]
) -> None:
"""If the current subject matches the target, refresh tags via get-tag."""
try:
from cmdlet import get as get_cmdlet # type: ignore
@@ -53,7 +55,9 @@ def _refresh_tag_view_if_current(file_hash: str | None, store_name: str | None,
subj_paths = [norm(v) for v in [subject.get("path"), subject.get("target")] if v]
else:
subj_hashes = [norm(get_field(subject, f)) for f in ("hash",) if get_field(subject, f)]
subj_paths = [norm(get_field(subject, f)) for f in ("path", "target") if get_field(subject, f)]
subj_paths = [
norm(get_field(subject, f)) for f in ("path", "target") if get_field(subject, f)
]
is_match = False
if target_hash and target_hash in subj_hashes:
@@ -76,11 +80,15 @@ def _refresh_tag_view_if_current(file_hash: str | None, store_name: str | None,
CMDLET = Cmdlet(
name="delete-tag",
summary="Remove tags from a file in a store.",
usage="delete-tag -store <store> [-query \"hash:<sha256>\"] <tag>[,<tag>...]",
usage='delete-tag -store <store> [-query "hash:<sha256>"] <tag>[,<tag>...]',
arg=[
SharedArgs.QUERY,
SharedArgs.STORE,
CmdletArg("<tag>[,<tag>...]", required=True, description="One or more tags to remove. Comma- or space-separated."),
CmdletArg(
"<tag>[,<tag>...]",
required=True,
description="One or more tags to remove. Comma- or space-separated.",
),
],
detail=[
"- Requires a Hydrus file (hash present) or explicit -query override.",
@@ -88,28 +96,35 @@ CMDLET = Cmdlet(
],
)
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
if should_show_help(args):
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
def _looks_like_tag_row(obj: Any) -> bool:
if obj is None:
return False
# TagItem (direct) or PipeObject/dict emitted from get-tag table rows.
try:
if hasattr(obj, '__class__') and obj.__class__.__name__ == 'TagItem' and hasattr(obj, 'tag_name'):
if (
hasattr(obj, "__class__")
and obj.__class__.__name__ == "TagItem"
and hasattr(obj, "tag_name")
):
return True
except Exception:
pass
try:
return bool(get_field(obj, 'tag_name'))
return bool(get_field(obj, "tag_name"))
except Exception:
return False
has_piped_tag = _looks_like_tag_row(result)
has_piped_tag_list = isinstance(result, list) and bool(result) and _looks_like_tag_row(result[0])
has_piped_tag_list = (
isinstance(result, list) and bool(result) and _looks_like_tag_row(result[0])
)
# Parse -query/-store overrides and collect remaining args.
override_query: str | None = None
@@ -151,8 +166,17 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
grouped_table = ""
grouped_tags = get_field(result, "tag") if result is not None else None
tags_arg = parse_tag_arguments(rest)
if grouped_table == "tag.selection" and isinstance(grouped_tags, list) and grouped_tags and not tags_arg:
file_hash = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash"))
if (
grouped_table == "tag.selection"
and isinstance(grouped_tags, list)
and grouped_tags
and not tags_arg
):
file_hash = (
normalize_hash(override_hash)
if override_hash
else normalize_hash(get_field(result, "hash"))
)
store_name = override_store or get_field(result, "store")
path = get_field(result, "path") or get_field(result, "target")
tags = [str(t) for t in grouped_tags if t]
@@ -161,7 +185,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if not tags_arg and not has_piped_tag and not has_piped_tag_list:
log("Requires at least one tag argument")
return 1
# Normalize result to a list for processing
items_to_process = []
if isinstance(result, list):
@@ -171,13 +195,13 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Process each item
success_count = 0
# If we have TagItems and no args, we are deleting the tags themselves
# If we have Files (or other objects) and args, we are deleting tags FROM those files
# Check if we are in "delete selected tags" mode (tag rows)
is_tag_item_mode = bool(items_to_process) and _looks_like_tag_row(items_to_process[0])
if is_tag_item_mode:
# Collect all tags to delete from the TagItems and batch per file.
# This keeps delete-tag efficient (one backend call per file).
@@ -186,7 +210,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
tag_name = get_field(item, "tag_name")
if not tag_name:
continue
item_hash = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(item, "hash"))
item_hash = (
normalize_hash(override_hash)
if override_hash
else normalize_hash(get_field(item, "hash"))
)
item_store = override_store or get_field(item, "store")
item_path = get_field(item, "path") or get_field(item, "target")
key = (str(item_hash or ""), str(item_store or ""), str(item_path or ""))
@@ -202,32 +230,33 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# "Delete tags from files" mode
# We need args (tags to delete)
if not tags_arg:
log("Requires at least one tag argument when deleting from files")
return 1
log("Requires at least one tag argument when deleting from files")
return 1
# Process each item
# If we have tags from @ syntax (e.g. delete-tag @{1,2}), we ignore the piped result for tag selection
# but we might need the piped result for the file context if @ selection was from a Tag table
# Actually, the @ selection logic above already extracted tags.
# Process items from pipe (or single result)
# If args are provided, they are the tags to delete from EACH item
# If items are TagItems and no args, the tag to delete is the item itself
for item in items_to_process:
tags_to_delete: list[str] = []
item_hash = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(item, "hash"))
item_path = (
get_field(item, "path")
or get_field(item, "target")
item_hash = (
normalize_hash(override_hash)
if override_hash
else normalize_hash(get_field(item, "hash"))
)
item_path = get_field(item, "path") or get_field(item, "target")
item_store = override_store or get_field(item, "store")
if _looks_like_tag_row(item):
if tags_arg:
tags_to_delete = tags_arg
else:
tag_name = get_field(item, 'tag_name')
tag_name = get_field(item, "tag_name")
if tag_name:
tags_to_delete = [str(tag_name)]
else:
@@ -244,12 +273,19 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
return 0
return 1
def _process_deletion(tags: list[str], file_hash: str | None, path: str | None, store_name: str | None, config: Dict[str, Any]) -> bool:
def _process_deletion(
tags: list[str],
file_hash: str | None,
path: str | None,
store_name: str | None,
config: Dict[str, Any],
) -> bool:
"""Helper to execute the deletion logic for a single target."""
if not tags:
return False
if not store_name:
log("Store is required (use -store or pipe a result with store)", file=sys.stderr)
return False
@@ -258,12 +294,16 @@ def _process_deletion(tags: list[str], file_hash: str | None, path: str | None,
if not resolved_hash and path:
try:
from SYS.utils import sha256_file
resolved_hash = sha256_file(Path(path))
except Exception:
resolved_hash = None
if not resolved_hash:
log("Item does not include a usable hash (and hash could not be derived from path)", file=sys.stderr)
log(
"Item does not include a usable hash (and hash could not be derived from path)",
file=sys.stderr,
)
return False
def _fetch_existing_tags() -> list[str]:
@@ -278,18 +318,23 @@ def _process_deletion(tags: list[str], file_hash: str | None, path: str | None,
title_tags = [t for t in tags if isinstance(t, str) and t.lower().startswith("title:")]
if title_tags:
existing_tags = _fetch_existing_tags()
current_titles = [t for t in existing_tags if isinstance(t, str) and t.lower().startswith("title:")]
current_titles = [
t for t in existing_tags if isinstance(t, str) and t.lower().startswith("title:")
]
del_title_set = {t.lower() for t in title_tags}
remaining_titles = [t for t in current_titles if t.lower() not in del_title_set]
if current_titles and not remaining_titles:
log("Cannot delete the last title: tag. Add a replacement title first (add-tags \"title:new title\").", file=sys.stderr)
log(
'Cannot delete the last title: tag. Add a replacement title first (add-tags "title:new title").',
file=sys.stderr,
)
return False
try:
backend = Store(config)[store_name]
ok = backend.delete_tag(resolved_hash, list(tags), config=config)
if ok:
preview = resolved_hash[:12] + ('' if len(resolved_hash) > 12 else '')
preview = resolved_hash[:12] + ("" if len(resolved_hash) > 12 else "")
debug(f"Removed {len(tags)} tag(s) from {preview} via store '{store_name}'.")
_refresh_tag_view_if_current(resolved_hash, store_name, path, config)
return True
@@ -302,6 +347,3 @@ def _process_deletion(tags: list[str], file_hash: str | None, path: str | None,
# Register cmdlet (no legacy decorator)
CMDLET.exec = _run
CMDLET.register()
+58 -20
View File
@@ -29,7 +29,11 @@ class Delete_Url(Cmdlet):
arg=[
SharedArgs.QUERY,
SharedArgs.STORE,
CmdletArg("url", required=False, description="URL to remove (optional when piping url rows)"),
CmdletArg(
"url",
required=False,
description="URL to remove (optional when piping url rows)",
),
],
detail=[
"- Removes URL association from file identified by hash+store",
@@ -38,7 +42,7 @@ class Delete_Url(Cmdlet):
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Delete URL from file via hash+store backend."""
parsed = parse_cmdlet_args(args, self)
@@ -47,9 +51,11 @@ class Delete_Url(Cmdlet):
if parsed.get("query") and not query_hash:
log("Error: -query must be of the form hash:<sha256>")
return 1
# Bulk input is common in pipelines; treat a list of PipeObjects as a batch.
results: List[Any] = result if isinstance(result, list) else ([result] if result is not None else [])
results: List[Any] = (
result if isinstance(result, list) else ([result] if result is not None else [])
)
if query_hash and len(results) > 1:
log("Error: -query hash:<sha256> cannot be used with multiple piped items")
@@ -57,18 +63,20 @@ class Delete_Url(Cmdlet):
# Extract hash and store from result or args
file_hash = query_hash or (get_field(result, "hash") if result is not None else None)
store_name = parsed.get("store") or (get_field(result, "store") if result is not None else None)
store_name = parsed.get("store") or (
get_field(result, "store") if result is not None else None
)
url_arg = parsed.get("url")
# If we have multiple piped items, we will resolve hash/store per item below.
if not results:
if not file_hash:
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
log('Error: No file hash provided (pipe an item or use -query "hash:<sha256>")')
return 1
if not store_name:
log("Error: No store name provided")
return 1
# Normalize hash (single-item mode)
if not results and file_hash:
file_hash = normalize_hash(file_hash)
@@ -87,7 +95,7 @@ class Delete_Url(Cmdlet):
return [u.strip() for u in normalize_urls(raw) if str(u).strip()]
urls_from_cli = _urls_from_arg(url_arg)
# Get backend and delete url
try:
storage = Store(config)
@@ -129,21 +137,29 @@ class Delete_Url(Cmdlet):
raw_hash = query_hash or get_field(item, "hash")
raw_store = store_override or get_field(item, "store")
if not raw_hash or not raw_store:
ctx.print_if_visible("[delete-url] Warning: Item missing hash/store; skipping", file=sys.stderr)
ctx.print_if_visible(
"[delete-url] Warning: Item missing hash/store; skipping",
file=sys.stderr,
)
continue
normalized = normalize_hash(raw_hash)
if not normalized:
ctx.print_if_visible("[delete-url] Warning: Item has invalid hash; skipping", file=sys.stderr)
ctx.print_if_visible(
"[delete-url] Warning: Item has invalid hash; skipping", file=sys.stderr
)
continue
store_text = str(raw_store).strip()
if not store_text:
ctx.print_if_visible("[delete-url] Warning: Item has empty store; skipping", file=sys.stderr)
ctx.print_if_visible(
"[delete-url] Warning: Item has empty store; skipping", file=sys.stderr
)
continue
if not storage.is_available(store_text):
ctx.print_if_visible(
f"[delete-url] Warning: Store '{store_text}' not configured; skipping", file=sys.stderr
f"[delete-url] Warning: Store '{store_text}' not configured; skipping",
file=sys.stderr,
)
continue
@@ -152,9 +168,17 @@ class Delete_Url(Cmdlet):
# - Otherwise, when piping url rows from get-url, delete the url(s) from each item.
item_urls = list(urls_from_cli)
if not item_urls:
item_urls = [u.strip() for u in normalize_urls(get_field(item, "url") or get_field(item, "source_url")) if str(u).strip()]
item_urls = [
u.strip()
for u in normalize_urls(
get_field(item, "url") or get_field(item, "source_url")
)
if str(u).strip()
]
if not item_urls:
ctx.print_if_visible("[delete-url] Warning: Item has no url field; skipping", file=sys.stderr)
ctx.print_if_visible(
"[delete-url] Warning: Item has no url field; skipping", file=sys.stderr
)
continue
batch.setdefault(store_text, []).append((normalized, item_urls))
@@ -168,7 +192,7 @@ class Delete_Url(Cmdlet):
merged: Dict[str, List[str]] = {}
for h, ulist in pairs:
merged.setdefault(h, [])
for u in (ulist or []):
for u in ulist or []:
if u and u not in merged[h]:
merged[h].append(u)
bulk_pairs = [(h, merged[h]) for h in merged.keys()]
@@ -195,27 +219,41 @@ class Delete_Url(Cmdlet):
# remove the piped url row(s).
remove_set = urls_from_cli
if not remove_set:
remove_set = [u.strip() for u in normalize_urls(get_field(item, "url") or get_field(item, "source_url")) if str(u).strip()]
remove_set = [
u.strip()
for u in normalize_urls(
get_field(item, "url") or get_field(item, "source_url")
)
if str(u).strip()
]
_set_item_url(item, _remove_urls(existing, list(remove_set)))
ctx.emit(item)
return 0
# Single-item mode
if not urls_from_cli:
urls_from_cli = [u.strip() for u in normalize_urls(get_field(result, "url") or get_field(result, "source_url")) if str(u).strip()]
urls_from_cli = [
u.strip()
for u in normalize_urls(
get_field(result, "url") or get_field(result, "source_url")
)
if str(u).strip()
]
if not urls_from_cli:
log("Error: No URL provided")
return 1
backend = storage[str(store_name)]
backend.delete_url(str(file_hash), list(urls_from_cli), config=config)
ctx.print_if_visible(f"✓ delete-url: {len(urls_from_cli)} url(s) removed", file=sys.stderr)
ctx.print_if_visible(
f"✓ delete-url: {len(urls_from_cli)} url(s) removed", file=sys.stderr
)
if result is not None:
existing = get_field(result, "url")
_set_item_url(result, _remove_urls(existing, list(urls_from_cli)))
ctx.emit(result)
return 0
except KeyError:
log(f"Error: Storage backend '{store_name}' not configured")
return 1
+155 -40
View File
@@ -44,8 +44,12 @@ class Download_File(Cmdlet):
SharedArgs.URL,
SharedArgs.PATH,
# Prefer -path for output directory to match other cmdlets; keep -output for backwards compatibility.
CmdletArg(name="-output", type="string", alias="o", description="(deprecated) Output directory (use -path instead)"),
CmdletArg(
name="-output",
type="string",
alias="o",
description="(deprecated) Output directory (use -path instead)",
),
],
detail=[
"Download files directly via HTTP without yt-dlp processing.",
@@ -67,7 +71,7 @@ class Download_File(Cmdlet):
raw_url = [raw_url]
expanded_urls: List[str] = []
for u in (raw_url or []):
for u in raw_url or []:
if u is None:
continue
s = str(u).strip()
@@ -99,7 +103,9 @@ class Download_File(Cmdlet):
return 1
@staticmethod
def _build_preview(raw_urls: Sequence[str], piped_items: Sequence[Any], total_items: int) -> List[Any]:
def _build_preview(
raw_urls: Sequence[str], piped_items: Sequence[Any], total_items: int
) -> List[Any]:
try:
preview: List[Any] = []
preview.extend(list(raw_urls or [])[: max(0, total_items)])
@@ -212,7 +218,11 @@ class Download_File(Cmdlet):
title = str(get_field(item, "title") or "").strip() if item is not None else ""
except Exception:
title = ""
table_title = f"Internet Archive: {title}".strip().rstrip(":") if title else f"Internet Archive: {identifier}"
table_title = (
f"Internet Archive: {title}".strip().rstrip(":")
if title
else f"Internet Archive: {identifier}"
)
try:
from result_table import ResultTable
@@ -329,7 +339,9 @@ class Download_File(Cmdlet):
config: Dict[str, Any],
provider_hint: Optional[str] = None,
) -> None:
title_val = (title_hint or downloaded_path.stem or "Unknown").strip() or downloaded_path.stem
title_val = (
title_hint or downloaded_path.stem or "Unknown"
).strip() or downloaded_path.stem
hash_value = self._compute_file_hash(downloaded_path)
tag: List[str] = []
if tags_hint:
@@ -406,9 +418,13 @@ class Download_File(Cmdlet):
provider = _get_provider("telegram", config)
if provider is None:
raise DownloadError("Telegram provider not configured or not available (check telethon/app_id/api_hash)")
raise DownloadError(
"Telegram provider not configured or not available (check telethon/app_id/api_hash)"
)
sr = SearchResult(table="telegram", title=str(url), path=str(url), full_metadata={})
sr = SearchResult(
table="telegram", title=str(url), path=str(url), full_metadata={}
)
downloaded_path = None
telegram_info: Optional[Dict[str, Any]] = None
if hasattr(provider, "download_url"):
@@ -428,9 +444,15 @@ class Download_File(Cmdlet):
try:
chat_info_raw = telegram_info.get("chat")
msg_info_raw = telegram_info.get("message")
chat_info: Dict[str, Any] = chat_info_raw if isinstance(chat_info_raw, dict) else {}
msg_info: Dict[str, Any] = msg_info_raw if isinstance(msg_info_raw, dict) else {}
channel = str(chat_info.get("title") or chat_info.get("username") or "").strip()
chat_info: Dict[str, Any] = (
chat_info_raw if isinstance(chat_info_raw, dict) else {}
)
msg_info: Dict[str, Any] = (
msg_info_raw if isinstance(msg_info_raw, dict) else {}
)
channel = str(
chat_info.get("title") or chat_info.get("username") or ""
).strip()
post = msg_info.get("id")
except Exception:
channel = ""
@@ -479,7 +501,16 @@ class Download_File(Cmdlet):
p = urlparse(str(url))
h = (p.hostname or "").strip().lower()
path = (p.path or "").strip().lower()
if "libgen" in h and any(x in path for x in ("/edition.php", "/file.php", "/ads.php", "/get.php", "/series.php")):
if "libgen" in h and any(
x in path
for x in (
"/edition.php",
"/file.php",
"/ads.php",
"/get.php",
"/series.php",
)
):
provider_name = "libgen"
except Exception:
pass
@@ -489,7 +520,9 @@ class Download_File(Cmdlet):
if provider_name == "openlibrary":
provider = get_provider("openlibrary", config)
if provider is None:
raise DownloadError("OpenLibrary provider not configured or not available")
raise DownloadError(
"OpenLibrary provider not configured or not available"
)
edition_id = self._openlibrary_edition_id_from_url(str(url))
title_hint = self._title_hint_from_url_slug(str(url))
@@ -512,7 +545,9 @@ class Download_File(Cmdlet):
# High-level steps for OpenLibrary borrow/download flow.
progress.begin_steps(5)
def _progress(kind: str, done: int, total: Optional[int], label: str) -> None:
def _progress(
kind: str, done: int, total: Optional[int], label: str
) -> None:
# kind:
# - "step": advance step text
# - "pages": update pipe percent/status
@@ -525,7 +560,9 @@ class Download_File(Cmdlet):
t = int(total) if isinstance(total, int) else 0
d = int(done) if isinstance(done, int) else 0
if t > 0:
pct = int(round((max(0, min(d, t)) / max(1, t)) * 100.0))
pct = int(
round((max(0, min(d, t)) / max(1, t)) * 100.0)
)
progress.set_percent(pct)
progress.set_status(f"downloading pages {d}/{t}")
else:
@@ -538,9 +575,15 @@ class Download_File(Cmdlet):
except Exception:
lbl = "download"
progress.begin_transfer(label=lbl, total=total)
progress.update_transfer(label=lbl, completed=done, total=total)
progress.update_transfer(
label=lbl, completed=done, total=total
)
try:
if isinstance(total, int) and total > 0 and int(done) >= int(total):
if (
isinstance(total, int)
and total > 0
and int(done) >= int(total)
):
progress.finish_transfer(label=lbl)
except Exception:
pass
@@ -590,12 +633,18 @@ class Download_File(Cmdlet):
exec_fn = getattr(_SEARCH_PROVIDER_CMDLET, "exec", None)
if callable(exec_fn):
ret = exec_fn(None, ["-provider", "libgen", "-query", fallback_query], config)
ret = exec_fn(
None,
["-provider", "libgen", "-query", fallback_query],
config,
)
try:
table = pipeline_context.get_last_result_table()
items = pipeline_context.get_last_result_items()
if table is not None:
pipeline_context.set_last_result_table_overlay(table, items)
pipeline_context.set_last_result_table_overlay(
table, items
)
except Exception:
pass
@@ -606,7 +655,10 @@ class Download_File(Cmdlet):
except Exception:
pass
log("[download-file] OpenLibrary URL could not be downloaded", file=sys.stderr)
log(
"[download-file] OpenLibrary URL could not be downloaded",
file=sys.stderr,
)
continue
# Generic provider URL handler (if a provider implements `download_url`).
@@ -734,11 +786,18 @@ class Download_File(Cmdlet):
full_metadata = get_field(item, "full_metadata")
target = get_field(item, "path") or get_field(item, "url")
if str(table or "").lower() == "alldebrid" and str(media_kind or "").lower() == "folder":
if (
str(table or "").lower() == "alldebrid"
and str(media_kind or "").lower() == "folder"
):
magnet_id = None
if isinstance(full_metadata, dict):
magnet_id = full_metadata.get("magnet_id")
if magnet_id is None and isinstance(target, str) and target.lower().startswith("alldebrid:magnet:"):
if (
magnet_id is None
and isinstance(target, str)
and target.lower().startswith("alldebrid:magnet:")
):
try:
magnet_id = int(target.split(":")[-1])
except Exception:
@@ -748,12 +807,20 @@ class Download_File(Cmdlet):
provider = get_search_provider("alldebrid", config)
if provider is not None:
try:
files = provider.search("*", limit=10_000, filters={"view": "files", "magnet_id": int(magnet_id)})
files = provider.search(
"*",
limit=10_000,
filters={"view": "files", "magnet_id": int(magnet_id)},
)
except Exception:
files = []
# If the magnet isn't ready, provider.search returns a single not-ready folder row.
if files and len(files) == 1 and getattr(files[0], "media_kind", "") == "folder":
if (
files
and len(files) == 1
and getattr(files[0], "media_kind", "") == "folder"
):
detail = getattr(files[0], "detail", "")
log(
f"[download-file] AllDebrid magnet {magnet_id} not ready ({detail or 'unknown'})",
@@ -761,7 +828,9 @@ class Download_File(Cmdlet):
)
else:
for sr in files:
expanded_items.append(sr.to_dict() if hasattr(sr, "to_dict") else sr)
expanded_items.append(
sr.to_dict() if hasattr(sr, "to_dict") else sr
)
continue
expanded_items.append(item)
@@ -784,7 +853,9 @@ class Download_File(Cmdlet):
get_search_provider = registry.get("get_search_provider")
SearchResult = registry.get("SearchResult")
expanded_items = self._expand_provider_items(piped_items=piped_items, registry=registry, config=config)
expanded_items = self._expand_provider_items(
piped_items=piped_items, registry=registry, config=config
)
for item in expanded_items:
try:
@@ -800,7 +871,11 @@ class Download_File(Cmdlet):
tags_list = None
full_metadata = get_field(item, "full_metadata")
if (not full_metadata) and isinstance(item, dict) and isinstance(item.get("extra"), dict):
if (
(not full_metadata)
and isinstance(item, dict)
and isinstance(item.get("extra"), dict)
):
extra_md = item["extra"].get("full_metadata")
if isinstance(extra_md, dict):
full_metadata = extra_md
@@ -832,7 +907,9 @@ class Download_File(Cmdlet):
if isinstance(md, dict):
magnet_name = md.get("magnet_name") or md.get("folder")
if not magnet_name:
magnet_name = str(get_field(item, "detail") or "").strip() or None
magnet_name = (
str(get_field(item, "detail") or "").strip() or None
)
magnet_dir_name = _sf(str(magnet_name)) if magnet_name else ""
@@ -845,7 +922,9 @@ class Download_File(Cmdlet):
base_tail_norm = _sf(base_tail).lower() if base_tail.strip() else ""
magnet_dir_norm = magnet_dir_name.lower() if magnet_dir_name else ""
if magnet_dir_name and (not base_tail_norm or base_tail_norm != magnet_dir_norm):
if magnet_dir_name and (
not base_tail_norm or base_tail_norm != magnet_dir_norm
):
output_dir = Path(output_dir) / magnet_dir_name
relpath = None
@@ -855,7 +934,11 @@ class Download_File(Cmdlet):
relpath = md["file"].get("_relpath")
if relpath:
parts = [p for p in str(relpath).replace("\\", "/").split("/") if p and p not in {".", ".."}]
parts = [
p
for p in str(relpath).replace("\\", "/").split("/")
if p and p not in {".", ".."}
]
# If the provider relpath already includes the magnet folder name as a
# root directory (common), strip it to prevent double nesting.
@@ -881,7 +964,11 @@ class Download_File(Cmdlet):
provider_sr = sr
# OpenLibrary: if provider download failed, do NOT try to download the OpenLibrary page HTML.
if downloaded_path is None and attempted_provider_download and str(table or "").lower() == "openlibrary":
if (
downloaded_path is None
and attempted_provider_download
and str(table or "").lower() == "openlibrary"
):
availability = None
reason = None
if isinstance(full_metadata, dict):
@@ -898,7 +985,10 @@ class Download_File(Cmdlet):
if not title_text and isinstance(full_metadata, dict):
title_text = str(full_metadata.get("title") or "").strip()
if title_text:
log(f"[download-file] Not available on OpenLibrary; searching LibGen for: {title_text}", file=sys.stderr)
log(
f"[download-file] Not available on OpenLibrary; searching LibGen for: {title_text}",
file=sys.stderr,
)
from cmdlet.search_provider import CMDLET as _SEARCH_PROVIDER_CMDLET
fallback_query = title_text
@@ -921,7 +1011,9 @@ class Download_File(Cmdlet):
table_obj = pipeline_context.get_last_result_table()
items_obj = pipeline_context.get_last_result_items()
if table_obj is not None:
pipeline_context.set_last_result_table_overlay(table_obj, items_obj)
pipeline_context.set_last_result_table_overlay(
table_obj, items_obj
)
except Exception:
pass
@@ -935,7 +1027,11 @@ class Download_File(Cmdlet):
continue
# Fallback: if we have a direct HTTP URL, download it directly
if downloaded_path is None and isinstance(target, str) and target.startswith("http"):
if (
downloaded_path is None
and isinstance(target, str)
and target.startswith("http")
):
# Guard: provider landing pages (e.g. LibGen ads.php) are HTML, not files.
# Never download these as "files".
if str(table or "").lower() == "libgen":
@@ -946,7 +1042,9 @@ class Download_File(Cmdlet):
file=sys.stderr,
)
continue
debug(f"[download-file] Provider item looks like direct URL, downloading: {target}")
debug(
f"[download-file] Provider item looks like direct URL, downloading: {target}"
)
suggested_name = str(title).strip() if title is not None else None
result_obj = _download_direct_file(
target,
@@ -958,7 +1056,10 @@ class Download_File(Cmdlet):
downloaded_path = self._path_from_download_result(result_obj)
if downloaded_path is None:
log(f"Cannot download item (no provider handler / unsupported target): {title or target}", file=sys.stderr)
log(
f"Cannot download item (no provider handler / unsupported target): {title or target}",
file=sys.stderr,
)
continue
# Allow providers to add/enrich tags and metadata during download.
@@ -1038,11 +1139,19 @@ class Download_File(Cmdlet):
# UX: In piped mode, allow a single positional arg to be the destination directory.
# Example: @1-4 | download-file "C:\\Users\\Me\\Downloads\\yoyo"
if had_piped_input and raw_url and len(raw_url) == 1 and (not parsed.get("path")) and (not parsed.get("output")):
if (
had_piped_input
and raw_url
and len(raw_url) == 1
and (not parsed.get("path"))
and (not parsed.get("output"))
):
candidate = str(raw_url[0] or "").strip()
low = candidate.lower()
looks_like_url = low.startswith(("http://", "https://", "ftp://"))
looks_like_provider = low.startswith(("magnet:", "alldebrid:", "hydrus:", "ia:", "internetarchive:"))
looks_like_provider = low.startswith(
("magnet:", "alldebrid:", "hydrus:", "ia:", "internetarchive:")
)
looks_like_windows_path = (
(len(candidate) >= 2 and candidate[1] == ":")
or candidate.startswith("\\\\")
@@ -1058,7 +1167,9 @@ class Download_File(Cmdlet):
log("No url or piped items to download", file=sys.stderr)
return 1
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
quiet_mode = (
bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
)
ia_picker_exit = self._maybe_show_internetarchive_formats(
raw_urls=raw_url,
piped_items=piped_items,
@@ -1082,7 +1193,9 @@ class Download_File(Cmdlet):
total_items = self._safe_total_items(raw_url, piped_items)
preview = self._build_preview(raw_url, piped_items, total_items)
progress.ensure_local_ui(label="download-file", total_items=total_items, items_preview=preview)
progress.ensure_local_ui(
label="download-file", total_items=total_items, items_preview=preview
)
registry = self._load_provider_registry()
@@ -1155,6 +1268,7 @@ class Download_File(Cmdlet):
# Priority 2: Config default output/temp directory
try:
from config import resolve_output_dir
final_output_dir = resolve_output_dir(config)
except Exception:
final_output_dir = Path.home() / "Downloads"
@@ -1173,6 +1287,7 @@ class Download_File(Cmdlet):
def _compute_file_hash(self, filepath: Path) -> str:
"""Compute SHA256 hash of a file."""
import hashlib
sha256_hash = hashlib.sha256()
with open(filepath, "rb") as f:
for byte_block in iter(lambda: f.read(4096), b""):
+363 -133
View File
File diff suppressed because it is too large Load Diff
+49 -18
View File
@@ -17,6 +17,7 @@ from typing import Any, Dict, Optional, Sequence
from SYS.logger import log
from . import _shared as sh
class Download_Torrent(sh.Cmdlet):
"""Class-based download-torrent cmdlet with self-registration."""
@@ -27,10 +28,29 @@ class Download_Torrent(sh.Cmdlet):
usage="download-torrent <magnet|.torrent> [options]",
alias=["torrent", "magnet"],
arg=[
sh.CmdletArg(name="magnet", type="string", required=False, description="Magnet link or .torrent file/URL", variadic=True),
sh.CmdletArg(name="output", type="string", description="Output directory for downloaded files"),
sh.CmdletArg(name="wait", type="float", description="Wait time (seconds) for magnet processing timeout"),
sh.CmdletArg(name="background", type="flag", alias="bg", description="Start download in background"),
sh.CmdletArg(
name="magnet",
type="string",
required=False,
description="Magnet link or .torrent file/URL",
variadic=True,
),
sh.CmdletArg(
name="output",
type="string",
description="Output directory for downloaded files",
),
sh.CmdletArg(
name="wait",
type="float",
description="Wait time (seconds) for magnet processing timeout",
),
sh.CmdletArg(
name="background",
type="flag",
alias="bg",
description="Start download in background",
),
],
detail=["Download torrents/magnets via AllDebrid API."],
exec=self.run,
@@ -51,7 +71,10 @@ class Download_Torrent(sh.Cmdlet):
except Exception:
api_key = None
if not api_key:
log("AllDebrid API key not configured (check config.conf [provider=alldebrid] api_key=...)", file=sys.stderr)
log(
"AllDebrid API key not configured (check config.conf [provider=alldebrid] api_key=...)",
file=sys.stderr,
)
return 1
for magnet_url in magnet_args:
if background_mode:
@@ -64,7 +87,9 @@ class Download_Torrent(sh.Cmdlet):
magnet_id = self._submit_magnet(worker_id, magnet_url, api_key)
if magnet_id <= 0:
continue
self._start_background_magnet_worker(worker_id, magnet_id, output_dir, api_key, wait_timeout)
self._start_background_magnet_worker(
worker_id, magnet_id, output_dir, api_key, wait_timeout
)
log(f"⧗ Torrent processing started (ID: {magnet_id})")
return 0
@@ -80,7 +105,7 @@ class Download_Torrent(sh.Cmdlet):
client = AllDebridClient(api_key)
log(f"[Worker {worker_id}] Submitting magnet to AllDebrid...")
magnet_info = client.magnet_add(magnet_url)
magnet_id = int(magnet_info.get('id', 0))
magnet_id = int(magnet_info.get("id", 0))
if magnet_id <= 0:
log(f"[Worker {worker_id}] Magnet add failed", file=sys.stderr)
return 0
@@ -90,7 +115,9 @@ class Download_Torrent(sh.Cmdlet):
log(f"[Worker {worker_id}] Magnet submit failed: {e}", file=sys.stderr)
return 0
def _start_background_magnet_worker(self, worker_id: str, magnet_id: int, output_dir: Path, api_key: str, wait_timeout: int) -> None:
def _start_background_magnet_worker(
self, worker_id: str, magnet_id: int, output_dir: Path, api_key: str, wait_timeout: int
) -> None:
thread = threading.Thread(
target=self._download_magnet_worker,
args=(worker_id, magnet_id, output_dir, api_key, wait_timeout),
@@ -119,7 +146,7 @@ class Download_Torrent(sh.Cmdlet):
elapsed = 0
while elapsed < wait_timeout:
status = client.magnet_status(magnet_id)
if status.get('ready'):
if status.get("ready"):
break
time.sleep(5)
elapsed += 5
@@ -129,13 +156,13 @@ class Download_Torrent(sh.Cmdlet):
files_result = client.magnet_links([magnet_id])
magnet_files = files_result.get(str(magnet_id), {})
files_array = magnet_files.get('files', [])
files_array = magnet_files.get("files", [])
if not files_array:
log(f"[Worker {worker_id}] No files found", file=sys.stderr)
return
for file_info in files_array:
file_url = file_info.get('link')
file_name = file_info.get('name')
file_url = file_info.get("link")
file_name = file_info.get("name")
if file_url and file_name:
Download_Torrent._download_file(file_url, output_dir / file_name)
log(f"[Worker {worker_id}] ✓ Downloaded {file_name}")
@@ -154,20 +181,22 @@ class Download_Torrent(sh.Cmdlet):
) -> None:
try:
from API.alldebrid import AllDebridClient
client = AllDebridClient(api_key)
log(f"[Worker {worker_id}] Submitting magnet to AllDebrid...")
magnet_info = client.magnet_add(magnet_url)
magnet_id = int(magnet_info.get('id', 0))
magnet_id = int(magnet_info.get("id", 0))
if magnet_id <= 0:
log(f"[Worker {worker_id}] Magnet add failed", file=sys.stderr)
return
log(f"[Worker {worker_id}] ✓ Magnet added (ID: {magnet_id})")
# Poll for ready status (simplified)
import time
elapsed = 0
while elapsed < wait_timeout:
status = client.magnet_status(magnet_id)
if status.get('ready'):
if status.get("ready"):
break
time.sleep(5)
elapsed += 5
@@ -176,13 +205,13 @@ class Download_Torrent(sh.Cmdlet):
return
files_result = client.magnet_links([magnet_id])
magnet_files = files_result.get(str(magnet_id), {})
files_array = magnet_files.get('files', [])
files_array = magnet_files.get("files", [])
if not files_array:
log(f"[Worker {worker_id}] No files found", file=sys.stderr)
return
for file_info in files_array:
file_url = file_info.get('link')
file_name = file_info.get('name')
file_url = file_info.get("link")
file_name = file_info.get("name")
if file_url:
Download_Torrent._download_file(file_url, output_dir / file_name)
log(f"[Worker {worker_id}] ✓ Downloaded {file_name}")
@@ -193,8 +222,9 @@ class Download_Torrent(sh.Cmdlet):
def _download_file(url: str, dest: Path) -> None:
try:
import requests
resp = requests.get(url, stream=True)
with open(dest, 'wb') as f:
with open(dest, "wb") as f:
for chunk in resp.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
@@ -211,4 +241,5 @@ class Download_Torrent(sh.Cmdlet):
)
thread.start()
CMDLET = Download_Torrent()
+90 -60
View File
@@ -24,7 +24,7 @@ from config import resolve_output_dir
class Get_File(sh.Cmdlet):
"""Export files to local path via hash+store."""
def __init__(self) -> None:
"""Initialize get-file cmdlet."""
super().__init__(
@@ -39,13 +39,13 @@ class Get_File(sh.Cmdlet):
],
detail=[
"- Exports file from storage backend to local path",
"- Uses selected item's hash, or -query \"hash:<sha256>\"",
'- Uses selected item\'s hash, or -query "hash:<sha256>"',
"- Preserves file extension and metadata",
],
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Export file via hash+store backend."""
debug(f"[get-file] run() called with result type: {type(result)}")
@@ -56,43 +56,45 @@ class Get_File(sh.Cmdlet):
if parsed.get("query") and not query_hash:
log("Error: -query must be of the form hash:<sha256>")
return 1
# Extract hash and store from result or args
file_hash = query_hash or sh.get_field(result, "hash")
store_name = parsed.get("store") or sh.get_field(result, "store")
output_path = parsed.get("path")
output_name = parsed.get("name")
debug(f"[get-file] file_hash={file_hash} store_name={store_name}")
if not file_hash:
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
log('Error: No file hash provided (pipe an item or use -query "hash:<sha256>")')
return 1
if not store_name:
log("Error: No store name provided")
return 1
# Normalize hash
file_hash = sh.normalize_hash(file_hash)
if not file_hash:
log("Error: Invalid hash format")
return 1
debug(f"[get-file] Getting storage backend: {store_name}")
# Get storage backend
store = Store(config)
backend = store[store_name]
debug(f"[get-file] Backend retrieved: {type(backend).__name__}")
# Get file metadata to determine name and extension
debug(f"[get-file] Getting metadata for hash...")
metadata = backend.get_metadata(file_hash)
if not metadata:
log(f"Error: File metadata not found for hash {file_hash}")
return 1
debug(f"[get-file] Metadata retrieved: title={metadata.get('title')}, ext={metadata.get('ext')}")
debug(
f"[get-file] Metadata retrieved: title={metadata.get('title')}, ext={metadata.get('ext')}"
)
def resolve_display_title() -> str:
candidates = [
@@ -110,16 +112,18 @@ class Get_File(sh.Cmdlet):
if text:
return text
return ""
debug(f"[get-file] Calling backend.get_file({file_hash})")
# Get file from backend (may return Path or URL string depending on backend)
source_path = backend.get_file(file_hash)
debug(f"[get-file] backend.get_file returned: {source_path}")
# Check if backend returned a URL (HydrusNetwork case)
if isinstance(source_path, str) and (source_path.startswith("http://") or source_path.startswith("https://")):
if isinstance(source_path, str) and (
source_path.startswith("http://") or source_path.startswith("https://")
):
# Hydrus backend returns a URL; open it only for this explicit user action.
try:
webbrowser.open(source_path)
@@ -127,20 +131,22 @@ class Get_File(sh.Cmdlet):
log(f"Error opening browser: {exc}", file=sys.stderr)
else:
debug(f"Opened in browser: {source_path}", file=sys.stderr)
# Emit result for pipeline
ctx.emit({
"hash": file_hash,
"store": store_name,
"url": source_path,
"title": resolve_display_title() or "Opened",
})
ctx.emit(
{
"hash": file_hash,
"store": store_name,
"url": source_path,
"title": resolve_display_title() or "Opened",
}
)
return 0
# Otherwise treat as file path (local/folder backends)
if isinstance(source_path, str):
source_path = Path(source_path)
if not source_path or not source_path.exists():
log(f"Error: Backend could not retrieve file for hash {file_hash}")
return 1
@@ -154,13 +160,15 @@ class Get_File(sh.Cmdlet):
ext_for_emit = metadata.get("ext") or source_path.suffix.lstrip(".")
self._open_file_default(source_path)
log(f"Opened: {source_path}", file=sys.stderr)
ctx.emit({
"hash": file_hash,
"store": store_name,
"path": str(source_path),
"title": str(display_title),
"ext": str(ext_for_emit or ""),
})
ctx.emit(
{
"hash": file_hash,
"store": store_name,
"path": str(source_path),
"title": str(display_title),
"ext": str(ext_for_emit or ""),
}
)
debug("[get-file] Completed successfully")
return 0
@@ -177,32 +185,38 @@ class Get_File(sh.Cmdlet):
if output_name:
filename = output_name
else:
title = (metadata.get("title") if isinstance(metadata, dict) else None) or resolve_display_title() or "export"
title = (
(metadata.get("title") if isinstance(metadata, dict) else None)
or resolve_display_title()
or "export"
)
filename = self._sanitize_filename(title)
# Add extension if metadata has it
ext = metadata.get("ext")
if ext and not filename.endswith(ext):
if not ext.startswith('.'):
ext = '.' + ext
if not ext.startswith("."):
ext = "." + ext
filename += ext
dest_path = self._unique_path(output_dir / filename)
# Copy file to destination
debug(f"[get-file] Copying {source_path} -> {dest_path}", file=sys.stderr)
shutil.copy2(source_path, dest_path)
log(f"Exported: {dest_path}", file=sys.stderr)
# Emit result for pipeline
ctx.emit({
"hash": file_hash,
"store": store_name,
"path": str(dest_path),
"title": filename,
})
ctx.emit(
{
"hash": file_hash,
"store": store_name,
"path": str(dest_path),
"title": filename,
}
)
debug(f"[get-file] Completed successfully")
return 0
@@ -216,7 +230,17 @@ class Get_File(sh.Cmdlet):
if self._open_local_file_in_browser_via_http(path):
return
if suffix in {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tif", ".tiff", ".svg"}:
if suffix in {
".png",
".jpg",
".jpeg",
".gif",
".webp",
".bmp",
".tif",
".tiff",
".svg",
}:
# Use default web browser for images.
if self._open_image_in_default_browser(path):
return
@@ -225,9 +249,13 @@ class Get_File(sh.Cmdlet):
os.startfile(str(path)) # type: ignore[attr-defined]
return
if sys.platform == "darwin":
subprocess.Popen(["open", str(path)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
subprocess.Popen(
["open", str(path)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
)
return
subprocess.Popen(["xdg-open", str(path)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
subprocess.Popen(
["xdg-open", str(path)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
)
except Exception as exc:
log(f"Error opening file: {exc}", file=sys.stderr)
@@ -282,7 +310,9 @@ class Get_File(sh.Cmdlet):
url = f"http://127.0.0.1:{port}/{quote(filename)}"
# Run server in the background.
server_thread = threading.Thread(target=httpd.serve_forever, kwargs={"poll_interval": 0.2}, daemon=True)
server_thread = threading.Thread(
target=httpd.serve_forever, kwargs={"poll_interval": 0.2}, daemon=True
)
server_thread.start()
# Auto-shutdown after a timeout to avoid lingering servers.
@@ -326,10 +356,10 @@ class Get_File(sh.Cmdlet):
"\n".join(
[
"<!doctype html>",
"<meta charset=\"utf-8\">",
'<meta charset="utf-8">',
f"<title>{resolved.name}</title>",
"<style>html,body{margin:0;padding:0;background:#000}img{display:block;max-width:100vw;max-height:100vh;margin:auto}</style>",
f"<img src=\"{image_url}\" alt=\"{resolved.name}\">",
f'<img src="{image_url}" alt="{resolved.name}">',
]
),
encoding="utf-8",
@@ -346,29 +376,29 @@ class Get_File(sh.Cmdlet):
return bool(webbrowser.open(wrapper_url))
except Exception:
return False
def _sanitize_filename(self, name: str) -> str:
"""Sanitize filename by removing invalid characters."""
allowed_chars = []
for ch in str(name):
if ch.isalnum() or ch in {'-', '_', ' ', '.'}:
if ch.isalnum() or ch in {"-", "_", " ", "."}:
allowed_chars.append(ch)
else:
allowed_chars.append(' ')
allowed_chars.append(" ")
# Collapse multiple spaces
sanitized = ' '.join(''.join(allowed_chars).split())
sanitized = " ".join("".join(allowed_chars).split())
return sanitized or "export"
def _unique_path(self, path: Path) -> Path:
"""Generate unique path by adding (1), (2), etc. if file exists."""
if not path.exists():
return path
stem = path.stem
suffix = path.suffix
parent = path.parent
counter = 1
while True:
new_path = parent / f"{stem} ({counter}){suffix}"
+39 -22
View File
@@ -26,7 +26,7 @@ class Get_Metadata(Cmdlet):
super().__init__(
name="get-metadata",
summary="Print metadata for files by hash and storage backend.",
usage="get-metadata [-query \"hash:<sha256>\"] [-store <backend>]",
usage='get-metadata [-query "hash:<sha256>"] [-store <backend>]',
alias=["meta"],
arg=[
SharedArgs.QUERY,
@@ -52,15 +52,16 @@ class Get_Metadata(Cmdlet):
explicit = meta.get("time_imported")
if isinstance(explicit, (int, float)):
return int(explicit)
# Try parsing string timestamps
if isinstance(explicit, str):
try:
import datetime as _dt
return int(_dt.datetime.fromisoformat(explicit).timestamp())
except Exception:
pass
return None
@staticmethod
@@ -70,14 +71,24 @@ class Get_Metadata(Cmdlet):
return ""
try:
import datetime as _dt
return _dt.datetime.utcfromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S")
except Exception:
return ""
@staticmethod
def _build_table_row(title: str, store: str, path: str, mime: str, size_bytes: Optional[int],
dur_seconds: Optional[int], imported_ts: Optional[int], url: list[str],
hash_value: Optional[str], pages: Optional[int] = None) -> Dict[str, Any]:
def _build_table_row(
title: str,
store: str,
path: str,
mime: str,
size_bytes: Optional[int],
dur_seconds: Optional[int],
imported_ts: Optional[int],
url: list[str],
hash_value: Optional[str],
pages: Optional[int] = None,
) -> Dict[str, Any]:
"""Build a table row dict with metadata fields."""
size_mb = None
size_int: Optional[int] = None
@@ -156,34 +167,38 @@ class Get_Metadata(Cmdlet):
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("No hash available - use -query \"hash:<sha256>\"", file=sys.stderr)
log('No hash available - use -query "hash:<sha256>"', file=sys.stderr)
return 1
# Get hash and store from parsed args or result
file_hash = query_hash or get_field(result, "hash")
storage_source = parsed.get("store") or get_field(result, "store")
if not file_hash:
log("No hash available - use -query \"hash:<sha256>\"", file=sys.stderr)
log('No hash available - use -query "hash:<sha256>"', file=sys.stderr)
return 1
if not storage_source:
log("No storage backend specified - use -store to specify", file=sys.stderr)
return 1
# Use storage backend to get metadata
try:
from Store import Store
storage = Store(config)
backend = storage[storage_source]
# Get metadata from backend
metadata = backend.get_metadata(file_hash)
if not metadata:
log(f"No metadata found for hash {file_hash[:8]}... in {storage_source}", file=sys.stderr)
log(
f"No metadata found for hash {file_hash[:8]}... in {storage_source}",
file=sys.stderr,
)
return 1
# Extract title from tags if available
title = get_field(result, "title") or file_hash[:16]
if not get_field(result, "title"):
@@ -196,7 +211,7 @@ class Get_Metadata(Cmdlet):
break
except Exception:
pass
# Extract metadata fields
mime_type = metadata.get("mime") or metadata.get("ext", "")
file_size = metadata.get("size")
@@ -224,13 +239,15 @@ class Get_Metadata(Cmdlet):
if len(nums) == 2:
duration_seconds = float(nums[0] * 60 + nums[1])
else:
duration_seconds = float(nums[0] * 3600 + nums[1] * 60 + nums[2])
duration_seconds = float(
nums[0] * 3600 + nums[1] * 60 + nums[2]
)
else:
duration_seconds = None
pages = metadata.get("pages")
url = metadata.get("url") or []
imported_ts = self._extract_imported_ts(metadata)
# Normalize url
if isinstance(url, str):
try:
@@ -239,7 +256,7 @@ class Get_Metadata(Cmdlet):
url = []
if not isinstance(url, list):
url = []
# Build display row
row = self._build_table_row(
title=title,
@@ -253,14 +270,14 @@ class Get_Metadata(Cmdlet):
hash_value=file_hash,
pages=pages,
)
table_title = f"get-metadata: {title}" if title else "get-metadata"
table = ResultTable(table_title).init_command(table_title, "get-metadata", list(args))
self._add_table_body_row(table, row)
ctx.set_last_result_table_overlay(table, [row], row)
ctx.emit(row)
return 0
except KeyError:
log(f"Storage backend '{storage_source}' not found", file=sys.stderr)
return 1
+8 -5
View File
@@ -25,7 +25,7 @@ class Get_Note(Cmdlet):
super().__init__(
name="get-note",
summary="List notes on a file in a store.",
usage="get-note -store <store> [-query \"hash:<sha256>\"]",
usage='get-note -store <store> [-query "hash:<sha256>"]',
alias=["get-notes", "get_note"],
arg=[
SharedArgs.STORE,
@@ -43,7 +43,9 @@ class Get_Note(Cmdlet):
pass
self.register()
def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]:
def _resolve_hash(
self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]
) -> Optional[str]:
resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash)
if resolved:
return resolved
@@ -76,7 +78,10 @@ class Get_Note(Cmdlet):
if store_override and query_hash:
results = [{"store": str(store_override), "hash": query_hash}]
else:
log("[get_note] Error: Requires piped item(s) or -store and -query \"hash:<sha256>\"", file=sys.stderr)
log(
'[get_note] Error: Requires piped item(s) or -store and -query "hash:<sha256>"',
file=sys.stderr,
)
return 1
store_registry = Store(config)
@@ -144,5 +149,3 @@ class Get_Note(Cmdlet):
CMDLET = Get_Note()
+163 -95
View File
@@ -29,7 +29,7 @@ from Store import Store
CMDLET = Cmdlet(
name="get-relationship",
summary="Print relationships for the selected file (Hydrus or Local).",
usage="get-relationship [-query \"hash:<sha256>\"]",
usage='get-relationship [-query "hash:<sha256>"]',
alias=[],
arg=[
SharedArgs.QUERY,
@@ -40,6 +40,7 @@ CMDLET = Cmdlet(
],
)
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
if should_show_help(_args):
@@ -64,44 +65,63 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
continue
i += 1
override_hash: str | None = sh.parse_single_hash_query(override_query) if override_query else None
override_hash: str | None = (
sh.parse_single_hash_query(override_query) if override_query else None
)
if override_query and not override_hash:
log("get-relationship requires -query \"hash:<sha256>\"", file=sys.stderr)
log('get-relationship requires -query "hash:<sha256>"', file=sys.stderr)
return 1
# Handle @N selection which creates a list
# This cmdlet is single-subject; require disambiguation when multiple items are provided.
if isinstance(result, list):
if len(result) == 0:
result = None
elif len(result) > 1 and not override_hash:
log("get-relationship expects a single item; select one row (e.g. @1) or pass -query \"hash:<sha256>\"", file=sys.stderr)
log(
'get-relationship expects a single item; select one row (e.g. @1) or pass -query "hash:<sha256>"',
file=sys.stderr,
)
return 1
else:
result = result[0]
# Initialize results collection
found_relationships = [] # List of dicts: {hash, type, title, path, store}
found_relationships = [] # List of dicts: {hash, type, title, path, store}
source_title = "Unknown"
def _add_relationship(entry: Dict[str, Any]) -> None:
"""Add relationship if not already present by hash or path."""
for existing in found_relationships:
if entry.get("hash") and str(existing.get("hash", "")).lower() == str(entry["hash"]).lower():
if (
entry.get("hash")
and str(existing.get("hash", "")).lower() == str(entry["hash"]).lower()
):
return
if entry.get("path") and str(existing.get("path", "")).lower() == str(entry["path"]).lower():
if (
entry.get("path")
and str(existing.get("path", "")).lower() == str(entry["path"]).lower()
):
return
found_relationships.append(entry)
# Store/hash-first subject resolution
store_name: Optional[str] = override_store
if not store_name:
store_name = get_field(result, "store")
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_hash_for_operation(None, result))
hash_hex = (
normalize_hash(override_hash)
if override_hash
else normalize_hash(get_hash_for_operation(None, result))
)
if not source_title or source_title == "Unknown":
source_title = get_field(result, "title") or get_field(result, "name") or (hash_hex[:16] + "..." if hash_hex else "Unknown")
source_title = (
get_field(result, "title")
or get_field(result, "name")
or (hash_hex[:16] + "..." if hash_hex else "Unknown")
)
local_db_checked = False
@@ -113,7 +133,11 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Folder store relationships
# IMPORTANT: only treat the Folder backend as a local DB store.
# Other backends may expose a location() method but are not SQLite folder stores.
if type(backend).__name__ == "Folder" and hasattr(backend, "location") and callable(getattr(backend, "location")):
if (
type(backend).__name__ == "Folder"
and hasattr(backend, "location")
and callable(getattr(backend, "location"))
):
storage_path = Path(str(backend.location()))
with API_folder_store(storage_path) as db:
local_db_checked = True
@@ -142,7 +166,9 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if not related_hash or related_hash == hash_hex:
continue
entry_type = "king" if str(rel_type).lower() == "alt" else str(rel_type)
entry_type = (
"king" if str(rel_type).lower() == "alt" else str(rel_type)
)
if entry_type == "king":
king_hashes.append(related_hash)
@@ -156,13 +182,15 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
except Exception:
pass
_add_relationship({
"hash": related_hash,
"type": entry_type,
"title": related_title,
"path": None,
"store": str(store_name),
})
_add_relationship(
{
"hash": related_hash,
"type": entry_type,
"title": related_title,
"path": None,
"store": str(store_name),
}
)
# Reverse relationships (alts pointing to this hash)
try:
@@ -187,13 +215,15 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
pass
entry_type = "alt" if rel_type == "alt" else (rel_type or "related")
_add_relationship({
"hash": child_hash,
"type": entry_type,
"title": child_title,
"path": None,
"store": str(store_name),
})
_add_relationship(
{
"hash": child_hash,
"type": entry_type,
"title": child_title,
"path": None,
"store": str(store_name),
}
)
# Siblings (alts that share the same king)
for king_hash in king_hashes:
@@ -218,24 +248,30 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
pass
entry_type = "alt" if sib_type == "alt" else (sib_type or "related")
_add_relationship({
"hash": sib_hash,
"type": entry_type,
"title": sib_title,
"path": None,
"store": str(store_name),
})
_add_relationship(
{
"hash": sib_hash,
"type": entry_type,
"title": sib_title,
"path": None,
"store": str(store_name),
}
)
except Exception as e:
log(f"Error checking store relationships: {e}", file=sys.stderr)
# If we found local relationships, we can stop or merge with Hydrus?
# For now, if we found local ones, let's show them.
# For now, if we found local ones, let's show them.
# But if the file is also in Hydrus, we might want those too.
# Let's try Hydrus if we have a hash.
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_hash_for_operation(None, result))
hash_hex = (
normalize_hash(override_hash)
if override_hash
else normalize_hash(get_hash_for_operation(None, result))
)
if hash_hex and not local_db_checked:
try:
client = None
@@ -274,7 +310,11 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if backend_obj is not None and hasattr(backend_obj, "get_tag"):
try:
tag_result = backend_obj.get_tag(h)
tags = tag_result[0] if isinstance(tag_result, tuple) and tag_result else tag_result
tags = (
tag_result[0]
if isinstance(tag_result, tuple) and tag_result
else tag_result
)
if isinstance(tags, list):
for t in tags:
if isinstance(t, str) and t.lower().startswith("title:"):
@@ -308,10 +348,14 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if isinstance(storage, dict):
for group in storage.values():
if isinstance(group, list):
tag_candidates.extend([str(x) for x in group if isinstance(x, str)])
tag_candidates.extend(
[str(x) for x in group if isinstance(x, str)]
)
display = svc_data.get("display_tags")
if isinstance(display, list):
tag_candidates.extend([str(x) for x in display if isinstance(x, str)])
tag_candidates.extend(
[str(x) for x in display if isinstance(x, str)]
)
flat = meta.get("tags_flat")
if isinstance(flat, list):
tag_candidates.extend([str(x) for x in flat if isinstance(x, str)])
@@ -331,7 +375,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if rel:
file_rels = rel.get("file_relationships", {})
this_file_rels = file_rels.get(hash_hex)
if this_file_rels:
# Map Hydrus relationship IDs to names.
# For /manage_file_relationships/get_file_relationships, the Hydrus docs define:
@@ -353,16 +397,25 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Some Hydrus responses provide a direct king hash under the 'king' key.
if key == "king":
king_hash = normalize_hash(rel_value) if isinstance(rel_value, str) else None
king_hash = (
normalize_hash(rel_value)
if isinstance(rel_value, str)
else None
)
if king_hash and king_hash != hash_hex:
if not any(str(r.get('hash', '')).lower() == king_hash for r in found_relationships):
found_relationships.append({
"hash": king_hash,
"type": "king",
"title": _resolve_related_title(king_hash),
"path": None,
"store": store_label,
})
if not any(
str(r.get("hash", "")).lower() == king_hash
for r in found_relationships
):
found_relationships.append(
{
"hash": king_hash,
"type": "king",
"title": _resolve_related_title(king_hash),
"path": None,
"store": store_label,
}
)
continue
rel_name = rel_map.get(key, f"type-{key}")
@@ -370,29 +423,43 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# The relationship value is typically a list of hashes.
if isinstance(rel_value, list):
for rel_hash in rel_value:
rel_hash_norm = normalize_hash(rel_hash) if isinstance(rel_hash, str) else None
rel_hash_norm = (
normalize_hash(rel_hash)
if isinstance(rel_hash, str)
else None
)
if not rel_hash_norm or rel_hash_norm == hash_hex:
continue
if not any(str(r.get('hash', '')).lower() == rel_hash_norm for r in found_relationships):
found_relationships.append({
"hash": rel_hash_norm,
"type": rel_name,
"title": _resolve_related_title(rel_hash_norm),
"path": None,
"store": store_label,
})
if not any(
str(r.get("hash", "")).lower() == rel_hash_norm
for r in found_relationships
):
found_relationships.append(
{
"hash": rel_hash_norm,
"type": rel_name,
"title": _resolve_related_title(rel_hash_norm),
"path": None,
"store": store_label,
}
)
# Defensive: sometimes the API may return a single hash string.
elif isinstance(rel_value, str):
rel_hash_norm = normalize_hash(rel_value)
if rel_hash_norm and rel_hash_norm != hash_hex:
if not any(str(r.get('hash', '')).lower() == rel_hash_norm for r in found_relationships):
found_relationships.append({
"hash": rel_hash_norm,
"type": rel_name,
"title": _resolve_related_title(rel_hash_norm),
"path": None,
"store": store_label,
})
if not any(
str(r.get("hash", "")).lower() == rel_hash_norm
for r in found_relationships
):
found_relationships.append(
{
"hash": rel_hash_norm,
"type": rel_name,
"title": _resolve_related_title(rel_hash_norm),
"path": None,
"store": store_label,
}
)
except Exception as exc:
# Only log error if we didn't find local relationships either
if not found_relationships:
@@ -402,66 +469,67 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
try:
from rich.panel import Panel
from rich_display import stdout_console
title = source_title or (hash_hex[:16] + "..." if hash_hex else "Item")
stdout_console().print(Panel(f"{title} has no relationships", title="Relationships"))
except Exception:
log("No relationships found.")
return 0
# Display results
table = ResultTable(f"Relationships: {source_title}").init_command("get-relationship", [])
# Sort by type then title
# Custom sort order: King first, then Derivative, then others
def type_sort_key(item):
t = item['type'].lower()
if t == 'king':
t = item["type"].lower()
if t == "king":
return 0
elif t == 'derivative':
elif t == "derivative":
return 1
elif t in {'alternative', 'alternate', 'alt'}:
elif t in {"alternative", "alternate", "alt"}:
return 2
elif t == 'duplicate':
elif t == "duplicate":
return 3
else:
return 4
found_relationships.sort(key=lambda x: (type_sort_key(x), x['title']))
found_relationships.sort(key=lambda x: (type_sort_key(x), x["title"]))
pipeline_results = []
for i, item in enumerate(found_relationships):
row = table.add_row()
row.add_column("Type", item['type'].title())
row.add_column("Title", item['title'])
row.add_column("Type", item["type"].title())
row.add_column("Title", item["title"])
# row.add_column("Hash", item['hash'][:16] + "...") # User requested removal
row.add_column("Store", item['store'])
row.add_column("Store", item["store"])
# Create result object for pipeline
res_obj = {
"title": item['title'],
"hash": item['hash'],
"file_hash": item['hash'],
"relationship_type": item['type'],
"store": item['store']
"title": item["title"],
"hash": item["hash"],
"file_hash": item["hash"],
"relationship_type": item["type"],
"store": item["store"],
}
# Target is always hash in store/hash-first mode
res_obj["target"] = item['hash']
res_obj["target"] = item["hash"]
pipeline_results.append(res_obj)
# Set selection args
table.set_row_selection_args(i, ["-store", str(item['store']), "-query", f"hash:{item['hash']}"])
table.set_row_selection_args(
i, ["-store", str(item["store"]), "-query", f"hash:{item['hash']}"]
)
ctx.set_last_result_table(table, pipeline_results)
from rich_display import stdout_console
stdout_console().print(table)
return 0
CMDLET.exec = _run
CMDLET.register()
+1532 -1403
View File
File diff suppressed because it is too large Load Diff
+178 -20
View File
@@ -1,8 +1,11 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Dict, List, Sequence
from typing import Any, Dict, List, Sequence, Optional, Set, Tuple
import sys
import re
from fnmatch import fnmatch
from urllib.parse import urlparse
import pipeline as ctx
from . import _shared as sh
@@ -26,51 +29,199 @@ class UrlItem:
class Get_Url(Cmdlet):
"""Get url associated with files via hash+store."""
"""Get url associated with files via hash+store, or search urls by pattern."""
def __init__(self) -> None:
super().__init__(
name="get-url",
summary="List url associated with a file",
usage="@1 | get-url",
arg=[
SharedArgs.QUERY,
SharedArgs.STORE,
],
summary="List url associated with a file, or search urls by pattern",
usage='@1 | get-url OR get-url -url "https://www.youtube.com/watch?v=xx"',
arg=[SharedArgs.QUERY, SharedArgs.STORE, SharedArgs.URL],
detail=[
"- Lists all url associated with file identified by hash+store",
"- Get url for file: @1 | get-url (requires hash+store from result)",
'- Search url across stores: get-url -url "www.google.com" (strips protocol & www prefix)',
'- Wildcard matching: get-url -url "youtube.com*" (matches all youtube.com urls)',
"- Pattern matching: domain matching ignores protocol (https://, http://, ftp://)",
],
exec=self.run,
)
self.register()
@staticmethod
def _normalize_url_for_search(url: str) -> str:
"""Strip protocol and www prefix from URL for searching.
Examples:
https://www.youtube.com/watch?v=xx -> youtube.com/watch?v=xx
http://www.google.com -> google.com
ftp://files.example.com -> files.example.com
"""
url = str(url or "").strip()
# Remove protocol (http://, https://, ftp://, etc.)
url = re.sub(r"^[a-z][a-z0-9+.-]*://", "", url, flags=re.IGNORECASE)
# Remove www. prefix (case-insensitive)
url = re.sub(r"^www\.", "", url, flags=re.IGNORECASE)
return url.lower()
@staticmethod
def _match_url_pattern(url: str, pattern: str) -> bool:
"""Match URL against pattern with wildcard support.
Strips protocol/www from both URL and pattern before matching.
Supports * and ? wildcards.
"""
normalized_url = Get_Url._normalize_url_for_search(url)
normalized_pattern = Get_Url._normalize_url_for_search(pattern)
# Use fnmatch for wildcard matching (* and ?)
return fnmatch(normalized_url, normalized_pattern)
def _search_urls_across_stores(
self, pattern: str, config: Dict[str, Any]
) -> Tuple[List[UrlItem], List[str]]:
"""Search for URLs matching pattern across all stores.
Returns:
Tuple of (matching_items, found_stores)
"""
items: List[UrlItem] = []
found_stores: Set[str] = set()
try:
storage = Store(config)
store_names = storage.list_backends() if hasattr(storage, "list_backends") else []
if not store_names:
log("Error: No stores configured", file=sys.stderr)
return items, list(found_stores)
for store_name in store_names:
try:
backend = storage[store_name]
# Try to search files in this backend
# For now, we'll iterate through known files (this is a limitation)
# Each backend should ideally support get_all_files() or similar
# For now, we use search with a broad query to find candidates
try:
# Try to get files via search (backend-specific)
search_results = backend.search("*", limit=1000)
if search_results:
for result in search_results:
file_hash = result.get("hash") or result.get("file_hash")
if not file_hash:
continue
try:
urls = backend.get_url(file_hash)
if urls:
for url in urls:
if self._match_url_pattern(str(url), pattern):
items.append(
UrlItem(
url=str(url),
hash=file_hash,
store=store_name,
)
)
found_stores.add(store_name)
except Exception:
pass
except Exception:
# Backend might not support search; skip
pass
except KeyError:
continue
except Exception as exc:
debug(f"Error searching store '{store_name}': {exc}", file=sys.stderr)
continue
return items, list(found_stores)
except Exception as exc:
log(f"Error searching stores: {exc}", file=sys.stderr)
return items, []
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Get url for file via hash+store backend."""
"""Get url for file via hash+store, or search urls by pattern."""
parsed = parse_cmdlet_args(args, self)
# Check if user provided a URL pattern to search for
search_pattern = parsed.get("url")
if search_pattern:
# URL search mode: find all files with matching URLs across stores
items, stores_searched = self._search_urls_across_stores(search_pattern, config)
if not items:
log(f"No urls matching pattern: {search_pattern}", file=sys.stderr)
return 1
# Create result table
from result_table import ResultTable
table = (
ResultTable("URL Search Results", max_columns=3)
.set_preserve_order(True)
.set_table("urls")
.set_value_case("preserve")
)
table.set_source_command("get-url", ["-url", search_pattern])
# Group by store for display
by_store: Dict[str, List[UrlItem]] = {}
for item in items:
if item.store not in by_store:
by_store[item.store] = []
by_store[item.store].append(item)
# Add rows grouped by store
for store_name in sorted(by_store.keys()):
store_items = by_store[store_name]
for idx, item in enumerate(store_items):
row = table.add_row()
if idx == 0:
row.add_column("Store", store_name)
else:
row.add_column("Store", "")
row.add_column("Url", item.url)
# Normalize for display
normalized = self._normalize_url_for_search(item.url)
row.add_column("Hash", item.hash[:16]) # Show first 16 chars
ctx.emit(item)
ctx.set_last_result_table(table if items else None, items, subject=result)
log(f"Found {len(items)} matching url(s) in {len(stores_searched)} store(s)")
return 0
# Original mode: Get URLs for a specific file by hash+store
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("Error: -query must be of the form hash:<sha256>")
return 1
# Extract hash and store from result or args
file_hash = query_hash or get_field(result, "hash")
store_name = parsed.get("store") or get_field(result, "store")
if not file_hash:
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
log('Error: No file hash provided (pipe an item or use -query "hash:<sha256>")')
return 1
if not store_name:
log("Error: No store name provided")
return 1
# Normalize hash
file_hash = normalize_hash(file_hash)
if not file_hash:
log("Error: Invalid hash format")
return 1
# Get backend and retrieve url
try:
storage = Store(config)
@@ -111,7 +262,7 @@ class Get_Url(Cmdlet):
log("No url found", file=sys.stderr)
return 0
except KeyError:
log(f"Error: Storage backend '{store_name}' not configured")
return 1
@@ -120,6 +271,13 @@ class Get_Url(Cmdlet):
return 1
# Import debug function from logger if available
try:
from SYS.logger import debug
except ImportError:
def debug(*args, **kwargs):
pass # Fallback no-op
CMDLET = Get_Url()
+333 -233
View File
File diff suppressed because it is too large Load Diff
+114 -71
View File
@@ -37,7 +37,6 @@ import pipeline as pipeline_context
# ============================================================================
# ============================================================================
# Playwright & Screenshot Dependencies
# ============================================================================
@@ -104,7 +103,6 @@ SITE_SELECTORS: Dict[str, List[str]] = {
}
class ScreenshotError(RuntimeError):
"""Raised when screenshot capture or upload fails."""
@@ -146,6 +144,7 @@ class ScreenshotResult:
# Helper Functions
# ============================================================================
def _slugify_url(url: str) -> str:
"""Convert URL to filesystem-safe slug."""
parsed = urlsplit(url)
@@ -172,7 +171,11 @@ def _tags_from_url(url: str) -> List[str]:
parsed = None
try:
parsed = urlsplit(u)
host = str(getattr(parsed, "hostname", None) or getattr(parsed, "netloc", "") or "").strip().lower()
host = (
str(getattr(parsed, "hostname", None) or getattr(parsed, "netloc", "") or "")
.strip()
.lower()
)
except Exception:
parsed = None
host = ""
@@ -300,7 +303,12 @@ def _convert_to_webp(
except Exception:
w, h = 0, 0
if downscale_if_oversize and isinstance(max_dim, int) and max_dim > 0 and (w > max_dim or h > max_dim):
if (
downscale_if_oversize
and isinstance(max_dim, int)
and max_dim > 0
and (w > max_dim or h > max_dim)
):
scale = 1.0
try:
scale = min(float(max_dim) / float(w), float(max_dim) / float(h))
@@ -320,7 +328,9 @@ def _convert_to_webp(
im = im.resize((new_w, new_h), resample=resample)
did_downscale = True
except Exception as exc:
debug(f"[_convert_to_webp] Downscale failed; attempting direct WEBP save anyway: {exc}")
debug(
f"[_convert_to_webp] Downscale failed; attempting direct WEBP save anyway: {exc}"
)
im.save(tmp_path, **save_kwargs)
@@ -332,6 +342,7 @@ def _convert_to_webp(
except Exception:
pass
def _matched_site_selectors(url: str) -> List[str]:
"""Return SITE_SELECTORS for a matched domain; empty if no match.
@@ -355,7 +366,9 @@ def _selectors_for_url(url: str) -> List[str]:
return _matched_site_selectors(url)
def _platform_preprocess(url: str, page: Any, warnings: List[str], timeout_ms: int = 10_000) -> None:
def _platform_preprocess(
url: str, page: Any, warnings: List[str], timeout_ms: int = 10_000
) -> None:
"""Best-effort page tweaks for popular platforms before capture."""
try:
u = str(url or "").lower()
@@ -373,14 +386,16 @@ def _platform_preprocess(url: str, page: Any, warnings: List[str], timeout_ms: i
return clicks
# Dismiss common cookie / consent prompts.
_try_click_buttons([
"Accept all",
"Accept",
"I agree",
"Agree",
"Allow all",
"OK",
])
_try_click_buttons(
[
"Accept all",
"Accept",
"I agree",
"Agree",
"Allow all",
"OK",
]
)
# Some sites need small nudges (best-effort).
if "reddit.com" in u:
@@ -490,7 +505,9 @@ def _prepare_output_path(options: ScreenshotOptions) -> Path:
return unique_path(path)
def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str], progress: PipelineProgress) -> None:
def _capture(
options: ScreenshotOptions, destination: Path, warnings: List[str], progress: PipelineProgress
) -> None:
"""Capture screenshot using Playwright."""
debug(f"[_capture] Starting capture for {options.url} -> {destination}")
try:
@@ -499,16 +516,24 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str],
# Ensure Chromium engine is used for the screen-shot cmdlet (force for consistency)
try:
current_browser = getattr(tool.defaults, "browser", "").lower() if getattr(tool, "defaults", None) is not None else ""
current_browser = (
getattr(tool.defaults, "browser", "").lower()
if getattr(tool, "defaults", None) is not None
else ""
)
if current_browser != "chromium":
debug(f"[_capture] Overriding Playwright browser '{current_browser}' -> 'chromium' for screen-shot cmdlet")
debug(
f"[_capture] Overriding Playwright browser '{current_browser}' -> 'chromium' for screen-shot cmdlet"
)
base_cfg = {}
try:
base_cfg = dict(getattr(tool, "_config", {}) or {})
except Exception:
base_cfg = {}
tool_block = dict(base_cfg.get("tool") or {}) if isinstance(base_cfg, dict) else {}
pw_block = dict(tool_block.get("playwright") or {}) if isinstance(tool_block, dict) else {}
pw_block = (
dict(tool_block.get("playwright") or {}) if isinstance(tool_block, dict) else {}
)
pw_block["browser"] = "chromium"
tool_block["playwright"] = pw_block
if isinstance(base_cfg, dict):
@@ -523,7 +548,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str],
format_name = _normalise_format(options.output_format)
headless = options.headless or format_name == "pdf"
debug(f"[_capture] Format: {format_name}, Headless: {headless}")
if format_name == "pdf" and not options.headless:
warnings.append("pdf output requires headless Chromium; overriding headless mode")
@@ -539,7 +564,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str],
warnings.append("navigation timeout; capturing current page state")
debug("Navigation timeout; proceeding with current state")
progress.step("loading navigation timeout")
# Skip article lookup by default (wait_for_article defaults to False)
if options.wait_for_article:
try:
@@ -549,7 +574,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str],
except PlaywrightTimeoutError:
warnings.append("<article> selector not found; capturing fallback")
debug("Article element not found; using fallback")
if options.wait_after_load > 0:
debug(f"Waiting {options.wait_after_load}s for page stabilization...")
time.sleep(min(10.0, max(0.0, options.wait_after_load)))
@@ -591,7 +616,9 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str],
for sel in selectors:
try:
debug(f"Trying selector: {sel}")
el = page.wait_for_selector(sel, timeout=max(0, int(options.selector_timeout_ms)))
el = page.wait_for_selector(
sel, timeout=max(0, int(options.selector_timeout_ms))
)
except PlaywrightTimeoutError:
debug(f"Selector not found: {sel}")
continue
@@ -604,7 +631,10 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str],
pass
progress.step("capturing output")
debug(f"Capturing element to {destination}...")
el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None))
el.screenshot(
path=str(destination),
type=("jpeg" if format_name == "jpeg" else None),
)
element_captured = True
debug("Element captured successfully")
break
@@ -645,8 +675,13 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str],
except Exception as exc:
debug(f"[_capture] Exception launching browser/page: {exc}")
msg = str(exc).lower()
if any(k in msg for k in ["executable", "not found", "no such file", "cannot find", "install"]):
raise ScreenshotError("Chromium Playwright browser binaries not found. Install them: python ./scripts/bootstrap.py --playwright-only --browsers chromium") from exc
if any(
k in msg
for k in ["executable", "not found", "no such file", "cannot find", "install"]
):
raise ScreenshotError(
"Chromium Playwright browser binaries not found. Install them: python ./scripts/bootstrap.py --playwright-only --browsers chromium"
) from exc
raise
except ScreenshotError:
# Re-raise ScreenshotError raised intentionally (do not wrap)
@@ -666,7 +701,9 @@ def _capture_screenshot(options: ScreenshotOptions, progress: PipelineProgress)
will_target = bool(options.prefer_platform_target) and requested_format != "pdf"
will_convert = requested_format == "webp"
will_archive = bool(options.archive and options.url)
total_steps = 9 + (1 if will_target else 0) + (1 if will_convert else 0) + (1 if will_archive else 0)
total_steps = (
9 + (1 if will_target else 0) + (1 if will_convert else 0) + (1 if will_archive else 0)
)
progress.begin_steps(total_steps)
progress.step("loading starting")
@@ -726,19 +763,20 @@ def _capture_screenshot(options: ScreenshotOptions, progress: PipelineProgress)
# Main Cmdlet Function
# ============================================================================
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Take screenshots of url in the pipeline.
Accepts:
- Single result object (dict or PipeObject) with 'path' field
- List of result objects to screenshot each
- Direct URL as string
Emits PipeObject-formatted results for each screenshot with:
- action: 'cmdlet:screen-shot'
- is_temp: True (screenshots are temporary artifacts)
- parent_id: hash of the original file/URL
Screenshots are created using Playwright and marked as temporary
so they can be cleaned up later with the cleanup cmdlet.
"""
@@ -761,9 +799,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# ========================================================================
# ARGUMENT PARSING
# ========================================================================
parsed = parse_cmdlet_args(args, CMDLET)
format_value = parsed.get("format")
if not format_value:
# Default format can be set via config.conf tool block:
@@ -782,7 +820,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
selector_arg = parsed.get("selector")
selectors = [selector_arg] if selector_arg else []
archive_enabled = parsed.get("archive", False)
# Positional URL argument (if provided)
url_arg = parsed.get("url")
positional_url = [str(url_arg)] if url_arg else []
@@ -801,15 +839,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Extract url from piped results
if piped_results:
for item in piped_results:
url = (
get_field(item, 'path')
or get_field(item, 'url')
or get_field(item, 'target')
)
url = get_field(item, "path") or get_field(item, "url") or get_field(item, "target")
if url:
url_to_process.append((str(url), item))
if not url_to_process:
log(f"No url to process for screen-shot cmdlet", file=sys.stderr)
return 1
@@ -819,9 +853,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# ========================================================================
# OUTPUT DIRECTORY RESOLUTION - Priority chain
# ========================================================================
screenshot_dir: Optional[Path] = None
# Primary: Use --storage if provided (highest priority)
if storage_value:
try:
@@ -830,7 +864,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
except ValueError as e:
log(str(e), file=sys.stderr)
return 1
# Secondary: Use config-based resolver ONLY if --storage not provided
if screenshot_dir is None and resolve_output_dir is not None:
try:
@@ -838,7 +872,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
debug(f"[screen_shot] Using config resolver: {screenshot_dir}")
except Exception:
pass
# Tertiary: Use config outfile ONLY if neither --storage nor resolver worked
if screenshot_dir is None and config and config.get("outfile"):
try:
@@ -846,12 +880,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
debug(f"[screen_shot] Using config outfile: {screenshot_dir}")
except Exception:
pass
# Default: User's Videos directory
if screenshot_dir is None:
screenshot_dir = Path.home() / "Videos"
debug(f"[screen_shot] Using default directory: {screenshot_dir}")
ensure_directory(screenshot_dir)
# If the caller isn't running the shared pipeline Live progress UI (e.g. direct
@@ -869,21 +903,21 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# ========================================================================
# PREPARE SCREENSHOT OPTIONS
# ========================================================================
format_name = _normalise_format(format_value)
filtered_selectors = [str(s).strip() for s in selectors if str(s).strip()]
manual_target_selectors = filtered_selectors if filtered_selectors else None
all_emitted = []
exit_code = 0
# ========================================================================
# PROCESS url AND CAPTURE SCREENSHOTS
# ========================================================================
def _extract_item_tags(item: Any) -> List[str]:
if item is None:
return []
raw = get_field(item, 'tag')
raw = get_field(item, "tag")
if isinstance(raw, list):
return [str(t) for t in raw if t is not None and str(t).strip()]
if isinstance(raw, str) and raw.strip():
@@ -913,7 +947,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if not url.lower().startswith(("http://", "https://", "file://")):
log(f"[screen_shot] Skipping non-URL input: {url}", file=sys.stderr)
continue
try:
# Create screenshot with provided options
# Force the Playwright engine to Chromium for the screen-shot cmdlet
@@ -966,28 +1000,32 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
options.prefer_platform_target = True
options.target_selectors = auto_selectors
debug(f"[screen_shot] Auto selectors matched for url: {auto_selectors}")
screenshot_result = _capture_screenshot(options, progress)
# Log results and warnings
debug(f"Screenshot captured to {screenshot_result.path}")
if screenshot_result.archive_url:
debug(f"Archives: {', '.join(screenshot_result.archive_url)}")
for warning in screenshot_result.warnings:
debug(f"Warning: {warning}")
# Compute hash of screenshot file
screenshot_hash = None
try:
with open(screenshot_result.path, 'rb') as f:
with open(screenshot_result.path, "rb") as f:
screenshot_hash = hashlib.sha256(f.read()).hexdigest()
except Exception:
pass
# Create PipeObject result - marked as TEMP since derivative artifact
capture_date = ""
try:
capture_date = datetime.fromtimestamp(screenshot_result.path.stat().st_mtime).date().isoformat()
capture_date = (
datetime.fromtimestamp(screenshot_result.path.stat().st_mtime)
.date()
.isoformat()
)
except Exception:
capture_date = datetime.now().date().isoformat()
@@ -997,7 +1035,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
upstream_tags = _extract_item_tags(origin_item)
filtered_upstream_tags = [
t for t in upstream_tags
t
for t in upstream_tags
if not str(t).strip().lower().startswith(("type:", "date:"))
]
@@ -1007,40 +1046,41 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
)
pipe_obj = create_pipe_object_result(
source='screenshot',
store='PATH',
source="screenshot",
store="PATH",
identifier=Path(screenshot_result.path).stem,
file_path=str(screenshot_result.path),
cmdlet_name='screen-shot',
cmdlet_name="screen-shot",
title=display_title,
hash_value=screenshot_hash,
is_temp=True,
parent_hash=hashlib.sha256(url.encode()).hexdigest(),
tag=merged_tags,
extra={
'source_url': url,
'archive_url': screenshot_result.archive_url,
'url': screenshot_result.url,
'target': str(screenshot_result.path), # Explicit target for add-file
}
"source_url": url,
"archive_url": screenshot_result.archive_url,
"url": screenshot_result.url,
"target": str(screenshot_result.path), # Explicit target for add-file
},
)
# Emit the result so downstream cmdlet (like add-file) can use it
pipeline_context.emit(pipe_obj)
all_emitted.append(pipe_obj)
# If we created a local progress UI, advance it per completed item.
progress.on_emit(pipe_obj)
except ScreenshotError as exc:
log(f"Error taking screenshot of {url}: {exc}", file=sys.stderr)
exit_code = 1
except Exception as exc:
log(f"Unexpected error taking screenshot of {url}: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
exit_code = 1
progress.close_local_ui(force_complete=True)
if not all_emitted:
@@ -1051,6 +1091,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
log(f"✓ Successfully captured {len(all_emitted)} screenshot(s)")
return exit_code
CMDLET = Cmdlet(
name="screen-shot",
summary="Capture a website screenshot",
@@ -1058,16 +1100,17 @@ CMDLET = Cmdlet(
alias=["screenshot", "ss"],
arg=[
SharedArgs.URL,
CmdletArg(name="format", type="string", description="Output format: webp, png, jpeg, or pdf"),
CmdletArg(
name="format", type="string", description="Output format: webp, png, jpeg, or pdf"
),
CmdletArg(name="selector", type="string", description="CSS selector for element capture"),
SharedArgs.PATH
SharedArgs.PATH,
],
detail=[
"Uses Playwright Chromium engine only. Install Chromium with: python ./scripts/bootstrap.py --playwright-only --browsers chromium",
"PDF output requires headless Chromium (the cmdlet will enforce headless mode for PDF).",
"Screenshots are temporary artifacts stored in the configured `temp` directory.",
]
],
)
CMDLET.exec = _run
+67 -26
View File
@@ -1,4 +1,5 @@
"""search-provider cmdlet: Search external providers (bandcamp, libgen, soulseek, youtube, alldebrid)."""
from __future__ import annotations
from typing import Any, Dict, List, Sequence, Optional
@@ -25,37 +26,54 @@ try:
except Exception: # pragma: no cover
get_local_storage_path = None # type: ignore
class Search_Provider(Cmdlet):
"""Search external content providers."""
def __init__(self):
super().__init__(
name="search-provider",
summary="Search external providers (bandcamp, libgen, soulseek, youtube, alldebrid, loc, internetarchive)",
usage="search-provider -provider <provider> <query> [-limit N] [-open ID]",
arg=[
CmdletArg("provider", type="string", required=True, description="Provider name: bandcamp, libgen, soulseek, youtube, alldebrid, loc, internetarchive"),
CmdletArg("query", type="string", required=True, description="Search query (supports provider-specific syntax)"),
CmdletArg("limit", type="int", description="Maximum results to return (default: 50)"),
CmdletArg("open", type="int", description="(alldebrid) Open folder/magnet by ID and list its files"),
CmdletArg(
"provider",
type="string",
required=True,
description="Provider name: bandcamp, libgen, soulseek, youtube, alldebrid, loc, internetarchive",
),
CmdletArg(
"query",
type="string",
required=True,
description="Search query (supports provider-specific syntax)",
),
CmdletArg(
"limit", type="int", description="Maximum results to return (default: 50)"
),
CmdletArg(
"open",
type="int",
description="(alldebrid) Open folder/magnet by ID and list its files",
),
],
detail=[
"Search external content providers:",
"- alldebrid: List your AllDebrid account folders (magnets). Select @N to view files.",
" Example: search-provider -provider alldebrid \"*\"",
" Example: search-provider -provider alldebrid -open 123 \"*\"",
' Example: search-provider -provider alldebrid "*"',
' Example: search-provider -provider alldebrid -open 123 "*"',
"- bandcamp: Search for music albums/tracks",
" Example: search-provider -provider bandcamp \"artist:altrusian grace\"",
' Example: search-provider -provider bandcamp "artist:altrusian grace"',
"- libgen: Search Library Genesis for books",
" Example: search-provider -provider libgen \"python programming\"",
' Example: search-provider -provider libgen "python programming"',
"- loc: Search Library of Congress (Chronicling America)",
" Example: search-provider -provider loc \"lincoln\"",
' Example: search-provider -provider loc "lincoln"',
"- soulseek: Search P2P network for music",
" Example: search-provider -provider soulseek \"pink floyd\"",
' Example: search-provider -provider soulseek "pink floyd"',
"- youtube: Search YouTube for videos",
" Example: search-provider -provider youtube \"tutorial\"",
' Example: search-provider -provider youtube "tutorial"',
"- internetarchive: Search archive.org items (advancedsearch syntax)",
" Example: search-provider -provider internetarchive \"title:(lincoln) AND mediatype:texts\"",
' Example: search-provider -provider internetarchive "title:(lincoln) AND mediatype:texts"',
"",
"Query syntax:",
"- bandcamp: Use 'artist:Name' to search by artist",
@@ -65,12 +83,12 @@ class Search_Provider(Cmdlet):
"- internetarchive: Archive.org advancedsearch query syntax",
"",
"Results can be piped to other cmdlet:",
" search-provider -provider bandcamp \"artist:grace\" | @1 | download-file",
' search-provider -provider bandcamp "artist:grace" | @1 | download-file',
],
exec=self.run
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Execute search-provider cmdlet."""
if should_show_help(args):
@@ -81,7 +99,9 @@ class Search_Provider(Cmdlet):
# Dynamic flag variants from cmdlet arg definitions.
flag_registry = self.build_flag_registry()
provider_flags = {f.lower() for f in (flag_registry.get("provider") or {"-provider", "--provider"})}
provider_flags = {
f.lower() for f in (flag_registry.get("provider") or {"-provider", "--provider"})
}
query_flags = {f.lower() for f in (flag_registry.get("query") or {"-query", "--query"})}
limit_flags = {f.lower() for f in (flag_registry.get("limit") or {"-limit", "--limit"})}
open_flags = {f.lower() for f in (flag_registry.get("open") or {"-open", "--open"})}
@@ -106,14 +126,20 @@ class Search_Provider(Cmdlet):
try:
limit = int(args_list[i + 1])
except ValueError:
log(f"Warning: Invalid limit value '{args_list[i + 1]}', using default 50", file=sys.stderr)
log(
f"Warning: Invalid limit value '{args_list[i + 1]}', using default 50",
file=sys.stderr,
)
limit = 50
i += 2
elif low in open_flags and i + 1 < len(args_list):
try:
open_id = int(args_list[i + 1])
except ValueError:
log(f"Warning: Invalid open value '{args_list[i + 1]}', ignoring", file=sys.stderr)
log(
f"Warning: Invalid open value '{args_list[i + 1]}', ignoring",
file=sys.stderr,
)
open_id = None
i += 2
elif not token.startswith("-"):
@@ -139,9 +165,9 @@ class Search_Provider(Cmdlet):
status = "" if available else ""
log(f" {status} {name}", file=sys.stderr)
return 1
debug(f"[search-provider] provider={provider_name}, query={query}, limit={limit}")
# Get provider
provider = get_search_provider(provider_name, config)
if not provider:
@@ -160,6 +186,7 @@ class Search_Provider(Cmdlet):
if library_root:
try:
from API.folder import API_folder_store
db = API_folder_store(library_root)
except Exception:
db = None
@@ -178,6 +205,7 @@ class Search_Provider(Cmdlet):
results_list = []
import result_table
importlib.reload(result_table)
from result_table import ResultTable
@@ -190,7 +218,9 @@ class Search_Provider(Cmdlet):
elif provider_lower == "loc":
provider_label = "LoC"
else:
provider_label = provider_text[:1].upper() + provider_text[1:] if provider_text else "Provider"
provider_label = (
provider_text[:1].upper() + provider_text[1:] if provider_text else "Provider"
)
if provider_lower == "alldebrid" and open_id is not None:
table_title = f"{provider_label} Files: {open_id}".strip().rstrip(":")
@@ -205,7 +235,9 @@ class Search_Provider(Cmdlet):
if provider_lower == "alldebrid":
if open_id is not None:
# Second-stage: show files for selected folder/magnet.
results = provider.search(query, limit=limit, filters={"view": "files", "magnet_id": open_id})
results = provider.search(
query, limit=limit, filters={"view": "files", "magnet_id": open_id}
)
else:
# Default: show folders (magnets) so user can select @N.
results = provider.search(query, limit=limit, filters={"view": "folders"})
@@ -222,7 +254,11 @@ class Search_Provider(Cmdlet):
# Emit results for pipeline
for search_result in results:
item_dict = search_result.to_dict() if hasattr(search_result, "to_dict") else dict(search_result)
item_dict = (
search_result.to_dict()
if hasattr(search_result, "to_dict")
else dict(search_result)
)
# Ensure table field is set (should be by provider, but just in case)
if "table" not in item_dict:
@@ -233,13 +269,18 @@ class Search_Provider(Cmdlet):
# For AllDebrid folder rows, allow @N to open and show files.
try:
if provider_lower == "alldebrid" and getattr(search_result, "media_kind", "") == "folder":
if (
provider_lower == "alldebrid"
and getattr(search_result, "media_kind", "") == "folder"
):
magnet_id = None
meta = getattr(search_result, "full_metadata", None)
if isinstance(meta, dict):
magnet_id = meta.get("magnet_id")
if magnet_id is not None:
table.set_row_selection_args(row_index, ["-open", str(magnet_id), "-query", "*"])
table.set_row_selection_args(
row_index, ["-open", str(magnet_id), "-query", "*"]
)
except Exception:
pass
results_list.append(item_dict)
+52 -20
View File
@@ -1,4 +1,5 @@
"""Search-store cmdlet: Search for files in storage backends (Folder, Hydrus)."""
from __future__ import annotations
from typing import Any, Dict, Sequence, List, Optional
@@ -12,7 +13,16 @@ from SYS.logger import log, debug
from . import _shared as sh
Cmdlet, CmdletArg, SharedArgs, get_field, should_show_help, normalize_hash, first_title_tag, parse_hash_query = (
(
Cmdlet,
CmdletArg,
SharedArgs,
get_field,
should_show_help,
normalize_hash,
first_title_tag,
parse_hash_query,
) = (
sh.Cmdlet,
sh.CmdletArg,
sh.SharedArgs,
@@ -37,9 +47,9 @@ class Search_Store(Cmdlet):
summary="Search storage backends (Folder, Hydrus) for files.",
usage="search-store [-query <query>] [-store BACKEND] [-limit N]",
arg=[
CmdletArg("query", description="Search query string"),
CmdletArg("limit", type="integer", description="Limit results (default: 100)"),
SharedArgs.STORE,
SharedArgs.QUERY,
],
detail=[
"Search across storage backends: Folder stores and Hydrus instances",
@@ -84,20 +94,22 @@ class Search_Store(Cmdlet):
# Ensure we have title field
if "title" not in payload:
payload["title"] = payload.get("name") or payload.get("target") or payload.get("path") or "Result"
payload["title"] = (
payload.get("name") or payload.get("target") or payload.get("path") or "Result"
)
# Ensure we have ext field
if "ext" not in payload:
title = str(payload.get("title", ""))
path_obj = Path(title)
if path_obj.suffix:
payload["ext"] = self._normalize_extension(path_obj.suffix.lstrip('.'))
payload["ext"] = self._normalize_extension(path_obj.suffix.lstrip("."))
else:
payload["ext"] = payload.get("ext", "")
# Ensure size_bytes is present for display (already set by search_file())
# result_table will handle formatting it
# Don't create manual columns - let result_table handle display
# This allows the table to respect max_columns and apply consistent formatting
return payload
@@ -134,11 +146,15 @@ class Search_Store(Cmdlet):
raw_title = None
try:
raw_title = ctx.get_current_stage_text("") if hasattr(ctx, "get_current_stage_text") else None
raw_title = (
ctx.get_current_stage_text("") if hasattr(ctx, "get_current_stage_text") else None
)
except Exception:
raw_title = None
command_title = (str(raw_title).strip() if raw_title else "") or _format_command_title("search-store", list(args_list))
command_title = (str(raw_title).strip() if raw_title else "") or _format_command_title(
"search-store", list(args_list)
)
# Build dynamic flag variants from cmdlet arg definitions.
# This avoids hardcoding flag spellings in parsing loops.
@@ -184,7 +200,7 @@ class Search_Store(Cmdlet):
store_filter = match.group(1).strip() or None
query = re.sub(r"\s*[,]?\s*store:[^\s,]+", " ", query, flags=re.IGNORECASE)
query = re.sub(r"\s{2,}", " ", query)
query = query.strip().strip(',')
query = query.strip().strip(",")
if store_filter and not storage_backend:
storage_backend = store_filter
@@ -198,6 +214,7 @@ class Search_Store(Cmdlet):
from API.folder import API_folder_store
from config import get_local_storage_path
import uuid
worker_id = str(uuid.uuid4())
library_root = get_local_storage_path(config or {})
if not library_root:
@@ -212,12 +229,13 @@ class Search_Store(Cmdlet):
"search-store",
title=f"Search: {query}",
description=f"Query: {query}",
pipe=ctx.get_current_command_text()
pipe=ctx.get_current_command_text(),
)
results_list = []
import result_table
import importlib
importlib.reload(result_table)
from result_table import ResultTable
@@ -233,6 +251,7 @@ class Search_Store(Cmdlet):
pass
from Store import Store
storage = Store(config=config or {})
from Store._base import Store as BaseStore
@@ -301,7 +320,11 @@ class Search_Store(Cmdlet):
else:
maybe_tags = tag_result
if isinstance(maybe_tags, list):
tags_list = [str(t).strip() for t in maybe_tags if isinstance(t, str) and str(t).strip()]
tags_list = [
str(t).strip()
for t in maybe_tags
if isinstance(t, str) and str(t).strip()
]
except Exception:
tags_list = []
@@ -336,7 +359,9 @@ class Search_Store(Cmdlet):
if size_bytes is None:
size_bytes = meta_obj.get("size_bytes")
try:
size_bytes_int: Optional[int] = int(size_bytes) if size_bytes is not None else None
size_bytes_int: Optional[int] = (
int(size_bytes) if size_bytes is not None else None
)
except Exception:
size_bytes_int = None
@@ -362,7 +387,7 @@ class Search_Store(Cmdlet):
else:
ctx.set_last_result_table(table, results_list)
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
db.update_worker_status(worker_id, 'completed')
db.update_worker_status(worker_id, "completed")
return 0
log("No results found", file=sys.stderr)
@@ -373,15 +398,18 @@ class Search_Store(Cmdlet):
except Exception:
pass
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
db.update_worker_status(worker_id, 'completed')
db.update_worker_status(worker_id, "completed")
return 0
if backend_to_search:
searched_backends.append(backend_to_search)
target_backend = storage[backend_to_search]
if type(target_backend).search is BaseStore.search:
log(f"Backend '{backend_to_search}' does not support searching", file=sys.stderr)
db.update_worker_status(worker_id, 'error')
log(
f"Backend '{backend_to_search}' does not support searching",
file=sys.stderr,
)
db.update_worker_status(worker_id, "error")
return 1
debug(f"[search-store] Searching '{backend_to_search}'")
results = target_backend.search(query, limit=limit)
@@ -395,7 +423,9 @@ class Search_Store(Cmdlet):
debug(f"[search-store] Searching '{backend_name}'")
backend_results = backend.search(query, limit=limit - len(all_results))
debug(f"[search-store] '{backend_name}' -> {len(backend_results or [])} result(s)")
debug(
f"[search-store] '{backend_name}' -> {len(backend_results or [])} result(s)"
)
if backend_results:
all_results.extend(backend_results)
if len(all_results) >= limit:
@@ -406,6 +436,7 @@ class Search_Store(Cmdlet):
if results:
for item in results:
def _as_dict(obj: Any) -> Dict[str, Any]:
if isinstance(obj, dict):
return dict(obj)
@@ -450,15 +481,16 @@ class Search_Store(Cmdlet):
pass
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
db.update_worker_status(worker_id, 'completed')
db.update_worker_status(worker_id, "completed")
return 0
except Exception as exc:
log(f"Search failed: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
try:
db.update_worker_status(worker_id, 'error')
db.update_worker_status(worker_id, "error")
except Exception:
pass
return 1
+93 -63
View File
@@ -1,4 +1,5 @@
"""Trim a media file using ffmpeg."""
from __future__ import annotations
from typing import Any, Dict, Sequence, Optional
@@ -30,9 +31,19 @@ CMDLET = Cmdlet(
usage="trim-file [-path <path>] [-input <path-or-url>] -range <start-end> [-outdir <dir>] [-delete]",
arg=[
CmdletArg("-path", description="Path to the file (optional if piped)."),
CmdletArg("-input", description="Override input media source (path or URL). Useful when piping store metadata but trimming from an mpv stream URL."),
CmdletArg("-range", required=True, description="Time range to trim (e.g. '3:45-3:55', '00:03:45-00:03:55', or '1h3m-1h10m30s')."),
CmdletArg("-outdir", description="Output directory for the clip (defaults to source folder for local files; otherwise uses config temp/videos)."),
CmdletArg(
"-input",
description="Override input media source (path or URL). Useful when piping store metadata but trimming from an mpv stream URL.",
),
CmdletArg(
"-range",
required=True,
description="Time range to trim (e.g. '3:45-3:55', '00:03:45-00:03:55', or '1h3m-1h10m30s').",
),
CmdletArg(
"-outdir",
description="Output directory for the clip (defaults to source folder for local files; otherwise uses config temp/videos).",
),
CmdletArg("-delete", type="flag", description="Delete the original file after trimming."),
],
detail=[
@@ -41,7 +52,7 @@ CMDLET = Cmdlet(
"Inherits tag values from the source file.",
"Adds a relationship to the source file (if hash is available).",
"Output can be piped to add-file.",
]
],
)
@@ -71,6 +82,7 @@ def _format_hms(total_seconds: float) -> str:
return "0s"
return "".join(parts)
def _is_url(value: str) -> bool:
try:
p = urlparse(str(value))
@@ -88,7 +100,7 @@ def _parse_time(time_str: str) -> float:
- SS(.sss)
- 1h3m53s (also 1h3m, 3m53s, 53s)
"""
raw = str(time_str or '').strip()
raw = str(time_str or "").strip()
if not raw:
raise ValueError("Empty time")
@@ -97,15 +109,15 @@ def _parse_time(time_str: str) -> float:
r"(?i)\s*(?:(?P<h>\d+(?:\.\d+)?)h)?(?:(?P<m>\d+(?:\.\d+)?)m)?(?:(?P<s>\d+(?:\.\d+)?)s)?\s*",
raw,
)
if hms and (hms.group('h') or hms.group('m') or hms.group('s')):
hours = float(hms.group('h') or 0)
minutes = float(hms.group('m') or 0)
seconds = float(hms.group('s') or 0)
if hms and (hms.group("h") or hms.group("m") or hms.group("s")):
hours = float(hms.group("h") or 0)
minutes = float(hms.group("m") or 0)
seconds = float(hms.group("s") or 0)
total = hours * 3600 + minutes * 60 + seconds
return float(total)
# Colon-separated
parts = [p.strip() for p in raw.split(':')]
parts = [p.strip() for p in raw.split(":")]
if len(parts) == 3:
return float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[2])
if len(parts) == 2:
@@ -117,15 +129,15 @@ def _parse_time(time_str: str) -> float:
def _sanitize_filename(name: str, *, max_len: int = 140) -> str:
name = str(name or '').strip()
name = str(name or "").strip()
if not name:
return 'clip'
return "clip"
# Windows-forbidden characters: <>:"/\\|?* plus control chars
name = re.sub('[<>:"/\\\\|?*\\x00-\\x1F]', '_', name)
name = re.sub('[<>:"/\\\\|?*\\x00-\\x1F]', "_", name)
name = re.sub(r"\s+", " ", name).strip()
name = name.rstrip('.')
name = name.rstrip(".")
if not name:
return 'clip'
return "clip"
if len(name) > max_len:
name = name[:max_len].rstrip()
return name
@@ -140,7 +152,9 @@ def _extract_store_name(item: Any) -> Optional[str]:
return None
def _persist_alt_relationship(*, config: Dict[str, Any], store_name: str, alt_hash: str, king_hash: str) -> None:
def _persist_alt_relationship(
*, config: Dict[str, Any], store_name: str, alt_hash: str, king_hash: str
) -> None:
"""Persist directional alt -> king relationship in the given backend."""
try:
store = Store(config)
@@ -155,7 +169,11 @@ def _persist_alt_relationship(*, config: Dict[str, Any], store_name: str, alt_ha
# Folder-backed local DB
try:
if type(backend).__name__ == "Folder" and hasattr(backend, "location") and callable(getattr(backend, "location")):
if (
type(backend).__name__ == "Folder"
and hasattr(backend, "location")
and callable(getattr(backend, "location"))
):
from API.folder import API_folder_store
from pathlib import Path
@@ -174,12 +192,15 @@ def _persist_alt_relationship(*, config: Dict[str, Any], store_name: str, alt_ha
except Exception:
return
def _trim_media(input_source: str, output_path: Path, start_seconds: float, duration_seconds: float) -> bool:
def _trim_media(
input_source: str, output_path: Path, start_seconds: float, duration_seconds: float
) -> bool:
"""Trim media using ffmpeg.
input_source may be a local path or a URL.
"""
ffmpeg_path = shutil.which('ffmpeg')
ffmpeg_path = shutil.which("ffmpeg")
if not ffmpeg_path:
log("ffmpeg not found in PATH", file=sys.stderr)
return False
@@ -190,38 +211,45 @@ def _trim_media(input_source: str, output_path: Path, start_seconds: float, dura
return False
cmd = [
ffmpeg_path, '-y',
'-ss', str(float(start_seconds)),
'-i', str(input_source),
'-t', str(float(duration_seconds)),
'-c', 'copy',
'-map_metadata', '0',
ffmpeg_path,
"-y",
"-ss",
str(float(start_seconds)),
"-i",
str(input_source),
"-t",
str(float(duration_seconds)),
"-c",
"copy",
"-map_metadata",
"0",
str(output_path),
]
debug(f"Running ffmpeg: {' '.join(cmd)}")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
log(f"ffmpeg error: {result.stderr}", file=sys.stderr)
return False
return True
except Exception as e:
log(f"Error parsing time or running ffmpeg: {e}", file=sys.stderr)
return False
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Trim a media file."""
# Parse arguments
parsed = parse_cmdlet_args(args, CMDLET)
range_arg = parsed.get("range")
if not range_arg or '-' not in range_arg:
if not range_arg or "-" not in range_arg:
log("Error: -range argument required (format: start-end)", file=sys.stderr)
return 1
start_str, end_str = [s.strip() for s in range_arg.split('-', 1)]
start_str, end_str = [s.strip() for s in range_arg.split("-", 1)]
if not start_str or not end_str:
log("Error: -range must be start-end", file=sys.stderr)
return 1
@@ -237,25 +265,25 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if duration_seconds <= 0:
log(f"Invalid range: start {start_str} >= end {end_str}", file=sys.stderr)
return 1
delete_original = parsed.get("delete", False)
path_arg = parsed.get("path")
input_override = parsed.get("input")
outdir_arg = parsed.get("outdir")
# Collect inputs
inputs = normalize_result_input(result)
# If path arg provided, add it to inputs
if path_arg:
inputs.append({"path": path_arg})
if not inputs:
log("No input files provided.", file=sys.stderr)
return 1
success_count = 0
for item in inputs:
store_name = _extract_store_name(item)
@@ -267,7 +295,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
file_path = item.path
elif isinstance(item, str):
file_path = item
if not file_path and not input_override:
continue
@@ -283,18 +311,20 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if not path_obj or not path_obj.exists():
log(f"File not found: {media_source}", file=sys.stderr)
continue
# Determine output directory
output_dir: Path
if outdir_arg:
output_dir = Path(str(outdir_arg)).expanduser()
elif store_name:
from config import resolve_output_dir
output_dir = resolve_output_dir(config or {})
elif path_obj is not None:
output_dir = path_obj.parent
else:
from config import resolve_output_dir
output_dir = resolve_output_dir(config or {})
try:
@@ -303,7 +333,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
pass
# Determine output filename
output_ext = ''
output_ext = ""
if path_obj is not None:
output_ext = path_obj.suffix
base_name = path_obj.stem
@@ -313,21 +343,21 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if title:
base_name = _sanitize_filename(str(title))
else:
base_name = time.strftime('%Y%m%d-%H%M%S')
base_name = time.strftime("%Y%m%d-%H%M%S")
if base_name.lower().startswith('clip_'):
if base_name.lower().startswith("clip_"):
base_name = base_name[5:] or base_name
try:
p = urlparse(str(media_source))
last = (p.path or '').split('/')[-1]
if last and '.' in last:
output_ext = '.' + last.split('.')[-1]
last = (p.path or "").split("/")[-1]
if last and "." in last:
output_ext = "." + last.split(".")[-1]
except Exception:
pass
if not output_ext or len(output_ext) > 8:
output_ext = '.mkv'
output_ext = ".mkv"
new_filename = f"clip_{base_name}{output_ext}"
output_path = output_dir / new_filename
@@ -341,30 +371,30 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if not candidate.exists():
output_path = candidate
break
# Trim
source_label = (path_obj.name if path_obj is not None else str(media_source))
source_label = path_obj.name if path_obj is not None else str(media_source)
log(f"Trimming {source_label} ({start_str} to {end_str})...", file=sys.stderr)
if _trim_media(str(media_source), output_path, start_seconds, duration_seconds):
log(f"Created clip: {output_path}", file=sys.stderr)
success_count += 1
# Prepare result for pipeline
# 1. Get source hash for relationship
source_hash = None
if isinstance(item, dict):
source_hash = item.get("hash")
elif hasattr(item, "hash"):
source_hash = item.hash
if not source_hash:
if path_obj is not None:
try:
source_hash = sha256_file(path_obj)
except Exception:
pass
# 2. Get tag values
# Do not inherit tags from the source (per UX request).
new_tags: list[str] = []
@@ -382,7 +412,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
urls.append(src_u.strip())
except Exception:
pass
# 3. Get title and modify it
title = extract_title_from_result(item)
if not title:
@@ -390,7 +420,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
range_hms = f"{_format_hms(start_seconds)}-{_format_hms(end_seconds)}"
new_title = f"[{range_hms}] - {title}"
# 4. Calculate clip hash
clip_hash = None
try:
@@ -449,20 +479,20 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"title": new_title,
"tag": new_tags,
"url": urls,
"media_kind": "video", # Assumption, or derive
"hash": clip_hash, # Pass calculated hash
"media_kind": "video", # Assumption, or derive
"hash": clip_hash, # Pass calculated hash
"store": stored_store,
"relationships": {
# Clip is an ALT of the source; store semantics are directional alt -> king.
# Provide both keys so downstream (e.g. add-file) can persist relationships.
"king": [source_hash] if source_hash else [],
"alt": [clip_hash] if (source_hash and clip_hash) else [],
}
},
}
# Emit result
ctx.emit(result_dict)
# Delete original if requested
if delete_original:
try:
@@ -473,11 +503,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Maybe leave that to user or cleanup cmdlet
except Exception as e:
log(f"Failed to delete original: {e}", file=sys.stderr)
else:
failed_label = (path_obj.name if path_obj is not None else str(media_source))
failed_label = path_obj.name if path_obj is not None else str(media_source)
log(f"Failed to trim {failed_label}", file=sys.stderr)
return 0 if success_count > 0 else 1
+19 -4
View File
@@ -20,6 +20,7 @@ def _should_hide_db_args(config: Optional[Dict[str, Any]]) -> bool:
except Exception:
return False
try:
from cmdlet import REGISTRY
except Exception:
@@ -93,7 +94,9 @@ def _normalize_arg(arg: Any) -> Dict[str, Any]:
}
def get_cmdlet_metadata(cmd_name: str, config: Optional[Dict[str, Any]] = None) -> Optional[Dict[str, Any]]:
def get_cmdlet_metadata(
cmd_name: str, config: Optional[Dict[str, Any]] = None
) -> Optional[Dict[str, Any]]:
"""Return normalized metadata for a cmdlet, if available (aliases supported)."""
ensure_registry_loaded()
normalized = cmd_name.replace("-", "_")
@@ -184,12 +187,22 @@ def list_cmdlet_metadata(config: Optional[Dict[str, Any]] = None) -> Dict[str, D
else:
entries.setdefault(
canonical,
{"name": canonical, "aliases": [], "usage": "", "summary": "", "details": [], "args": [], "raw": None},
{
"name": canonical,
"aliases": [],
"usage": "",
"summary": "",
"details": [],
"args": [],
"raw": None,
},
)
return entries
def list_cmdlet_names(include_aliases: bool = True, config: Optional[Dict[str, Any]] = None) -> List[str]:
def list_cmdlet_names(
include_aliases: bool = True, config: Optional[Dict[str, Any]] = None
) -> List[str]:
"""Return sorted cmdlet names (optionally including aliases)."""
ensure_registry_loaded()
entries = list_cmdlet_metadata(config=config)
@@ -223,7 +236,9 @@ def get_cmdlet_arg_flags(cmd_name: str, config: Optional[Dict[str, Any]] = None)
return flags
def get_cmdlet_arg_choices(cmd_name: str, arg_name: str, config: Optional[Dict[str, Any]] = None) -> List[str]:
def get_cmdlet_arg_choices(
cmd_name: str, arg_name: str, config: Optional[Dict[str, Any]] = None
) -> List[str]:
"""Return declared choices for a cmdlet argument."""
meta = get_cmdlet_metadata(cmd_name, config=config)
if not meta:
+4 -1
View File
@@ -32,7 +32,9 @@ def register_native_commands(registry: Dict[str, CmdletFn]) -> None:
"""Import native command modules and register their CMDLET exec functions."""
base_dir = os.path.dirname(__file__)
for filename in os.listdir(base_dir):
if not (filename.endswith(".py") and not filename.startswith("_") and filename != "__init__.py"):
if not (
filename.endswith(".py") and not filename.startswith("_") and filename != "__init__.py"
):
continue
mod_name = filename[:-3]
@@ -43,5 +45,6 @@ def register_native_commands(registry: Dict[str, CmdletFn]) -> None:
_register_cmdlet_object(cmdlet_obj, registry)
except Exception as exc:
import sys
print(f"Error importing native command '{mod_name}': {exc}", file=sys.stderr)
continue
+26 -20
View File
@@ -7,32 +7,37 @@ from SYS.logger import log
from result_table import ResultTable
import pipeline as ctx
ADJECTIVE_FILE = os.path.join(os.path.dirname(os.path.dirname(__file__)), "cmdnat", "adjective.json")
ADJECTIVE_FILE = os.path.join(
os.path.dirname(os.path.dirname(__file__)), "cmdnat", "adjective.json"
)
def _load_adjectives() -> Dict[str, List[str]]:
try:
if os.path.exists(ADJECTIVE_FILE):
with open(ADJECTIVE_FILE, 'r', encoding='utf-8') as f:
with open(ADJECTIVE_FILE, "r", encoding="utf-8") as f:
return json.load(f)
except Exception as e:
log(f"Error loading adjectives: {e}", file=sys.stderr)
return {}
def _save_adjectives(data: Dict[str, List[str]]) -> bool:
try:
with open(ADJECTIVE_FILE, 'w', encoding='utf-8') as f:
with open(ADJECTIVE_FILE, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
return True
except Exception as e:
log(f"Error saving adjectives: {e}", file=sys.stderr)
return False
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
data = _load_adjectives()
# Parse arguments manually first to handle positional args
# We expect: .adjective [category] [tag] [-add] [-delete]
# If no args, list categories
if not args:
table = ResultTable("Adjective Categories")
@@ -41,10 +46,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
row.add_column("#", str(i + 1))
row.add_column("Category", category)
row.add_column("Tag Amount", str(len(tags)))
# Selection expands to: .adjective "Category Name"
table.set_row_selection_args(i, [category])
table.set_source_command(".adjective")
ctx.set_last_result_table_overlay(table, list(data.keys()))
ctx.set_current_stage_table(table)
@@ -55,7 +60,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# We have args. First arg is likely category.
category = args[0]
# Check if we are adding a new category (implicit if it doesn't exist)
if category not in data:
# If only category provided, create it
@@ -65,12 +70,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
log(f"Created new category: {category}")
# If more args, we might be trying to add to a non-existent category
elif "-add" in args:
data[category] = []
# Continue to add logic
data[category] = []
# Continue to add logic
# Handle operations within category
remaining_args = list(args[1:])
# Check for -add flag
if "-add" in remaining_args:
# .adjective category -add tag
@@ -82,7 +87,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
tag = remaining_args[add_idx + 1]
elif add_idx > 0:
tag = remaining_args[add_idx - 1]
if tag:
if tag not in data[category]:
data[category].append(tag)
@@ -93,7 +98,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
else:
log("Error: No tag specified to add")
return 1
# Check for -delete flag
elif "-delete" in remaining_args:
# .adjective category -delete tag
@@ -104,7 +109,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
tag = remaining_args[del_idx + 1]
elif del_idx > 0:
tag = remaining_args[del_idx - 1]
if tag:
if tag in data[category]:
data[category].remove(tag)
@@ -115,7 +120,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
else:
log("Error: No tag specified to delete")
return 1
# List tags in category (Default action if no flags or after modification)
tags = data.get(category, [])
table = ResultTable(f"Tags in '{category}'")
@@ -123,20 +128,21 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
row = table.add_row()
row.add_column("#", str(i + 1))
row.add_column("Tag", tag)
# Selection expands to: .adjective "Category" "Tag"
# This allows typing @N -delete to delete it
table.set_row_selection_args(i, [category, tag])
table.set_source_command(".adjective")
ctx.set_last_result_table_overlay(table, tags)
ctx.set_current_stage_table(table)
from rich_display import stdout_console
stdout_console().print(table)
return 0
CMDLET = Cmdlet(
name=".adjective",
alias=["adj"],
@@ -148,5 +154,5 @@ CMDLET = Cmdlet(
CmdletArg(name="add", type="flag", description="Add tag"),
CmdletArg(name="delete", type="flag", description="Delete tag"),
],
exec=_run
exec=_run,
)
+43 -38
View File
@@ -9,55 +9,55 @@ CMDLET = Cmdlet(
usage=".config [key] [value]",
arg=[
CmdletArg(
name="key",
description="Configuration key to update (dot-separated)",
required=False
name="key", description="Configuration key to update (dot-separated)", required=False
),
CmdletArg(
name="value",
description="New value for the configuration key",
required=False
)
]
CmdletArg(name="value", description="New value for the configuration key", required=False),
],
)
def flatten_config(config: Dict[str, Any], parent_key: str = '', sep: str = '.') -> List[Dict[str, Any]]:
def flatten_config(
config: Dict[str, Any], parent_key: str = "", sep: str = "."
) -> List[Dict[str, Any]]:
items = []
for k, v in config.items():
if k.startswith('_'): # Skip internal keys
if k.startswith("_"): # Skip internal keys
continue
new_key = f"{parent_key}{sep}{k}" if parent_key else k
if isinstance(v, dict):
items.extend(flatten_config(v, new_key, sep=sep))
else:
items.append({
"Key": new_key,
"Value": str(v),
"Type": type(v).__name__,
"_selection_args": [new_key]
})
items.append(
{
"Key": new_key,
"Value": str(v),
"Type": type(v).__name__,
"_selection_args": [new_key],
}
)
return items
def set_nested_config(config: Dict[str, Any], key: str, value: str) -> bool:
keys = key.split('.')
keys = key.split(".")
d = config
# Navigate to the parent dict
for k in keys[:-1]:
if k not in d or not isinstance(d[k], dict):
d[k] = {}
d = d[k]
last_key = keys[-1]
# Try to preserve type if key exists
if last_key in d:
current_val = d[last_key]
if isinstance(current_val, bool):
if value.lower() in ('true', 'yes', '1', 'on'):
if value.lower() in ("true", "yes", "1", "on"):
d[last_key] = True
elif value.lower() in ('false', 'no', '0', 'off'):
elif value.lower() in ("false", "no", "0", "off"):
d[last_key] = False
else:
# Fallback to boolean conversion of string (usually True for non-empty)
@@ -80,53 +80,57 @@ def set_nested_config(config: Dict[str, Any], key: str, value: str) -> bool:
d[last_key] = value
else:
# New key, try to infer type
if value.lower() in ('true', 'false'):
d[last_key] = (value.lower() == 'true')
if value.lower() in ("true", "false"):
d[last_key] = value.lower() == "true"
elif value.isdigit():
d[last_key] = int(value)
else:
d[last_key] = value
return True
def _run(piped_result: Any, args: List[str], config: Dict[str, Any]) -> int:
# Reload config to ensure we have the latest on disk
# We don't use the passed 'config' because we want to edit the file
# and 'config' might contain runtime objects (like worker manager)
# But load_config() returns a fresh dict from disk (or cache)
# We should use load_config()
current_config = load_config()
# Parse args
# We handle args manually because of the potential for spaces in values
# and the @ expansion logic in CLI.py passing args
if not args:
# List mode
items = flatten_config(current_config)
# Sort by key
items.sort(key=lambda x: x['Key'])
items.sort(key=lambda x: x["Key"])
# Emit items for ResultTable
import pipeline as ctx
for item in items:
ctx.emit(item)
return 0
# Update mode
key = args[0]
if len(args) < 2:
print(f"Error: Value required for key '{key}'")
return 1
value = " ".join(args[1:])
# Remove quotes if present
if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")):
if (value.startswith('"') and value.endswith('"')) or (
value.startswith("'") and value.endswith("'")
):
value = value[1:-1]
try:
set_nested_config(current_config, key, value)
save_config(current_config)
@@ -136,4 +140,5 @@ def _run(piped_result: Any, args: List[str], config: Dict[str, Any]) -> int:
print(f"Error updating config: {e}")
return 1
CMDLET.exec = _run
+3 -1
View File
@@ -38,7 +38,9 @@ def _find_cmd_metadata(name: str, metadata: Dict[str, Dict[str, Any]]) -> Option
return None
def _render_list(metadata: Dict[str, Dict[str, Any]], filter_text: Optional[str], args: Sequence[str]) -> None:
def _render_list(
metadata: Dict[str, Dict[str, Any]], filter_text: Optional[str], args: Sequence[str]
) -> None:
table = ResultTable("Help")
table.set_source_command(".help", list(args))
+67 -15
View File
@@ -169,6 +169,7 @@ def _extract_file_path(item: Any) -> Optional[str]:
Returns a filesystem path string only if it exists.
"""
def _maybe_local_path(value: Any) -> Optional[str]:
if value is None:
return None
@@ -225,7 +226,11 @@ def _extract_url(item: Any) -> Optional[str]:
if isinstance(item, dict):
for key in ("url", "source_url", "path", "target"):
raw = item.get(key)
if isinstance(raw, str) and raw.strip() and raw.strip().startswith(("http://", "https://")):
if (
isinstance(raw, str)
and raw.strip()
and raw.strip().startswith(("http://", "https://"))
):
return raw.strip()
except Exception:
pass
@@ -264,7 +269,9 @@ def _extract_hash_from_hydrus_file_url(url: str) -> Optional[str]:
return None
def _maybe_download_hydrus_file(item: Any, config: Dict[str, Any], output_dir: Path) -> Optional[str]:
def _maybe_download_hydrus_file(
item: Any, config: Dict[str, Any], output_dir: Path
) -> Optional[str]:
"""If the item looks like a Hydrus file (hash + Hydrus URL), download it using Hydrus access key headers.
This avoids 401 from Hydrus when the URL is /get_files/file?hash=... without headers.
@@ -307,18 +314,24 @@ def _maybe_download_hydrus_file(item: Any, config: Dict[str, Any], output_dir: P
is_hydrus_url = False
if url:
parsed = urlparse(url)
is_hydrus_url = (parsed.path or "").endswith("/get_files/file") and _extract_hash_from_hydrus_file_url(url) == file_hash
is_hydrus_url = (parsed.path or "").endswith(
"/get_files/file"
) and _extract_hash_from_hydrus_file_url(url) == file_hash
hydrus_instances: set[str] = set()
try:
store_cfg = (config or {}).get("store") if isinstance(config, dict) else None
if isinstance(store_cfg, dict):
hydrus_cfg = store_cfg.get("hydrusnetwork")
if isinstance(hydrus_cfg, dict):
hydrus_instances = {str(k).strip().lower() for k in hydrus_cfg.keys() if str(k).strip()}
hydrus_instances = {
str(k).strip().lower() for k in hydrus_cfg.keys() if str(k).strip()
}
except Exception:
hydrus_instances = set()
store_hint = store_name.lower() in {"hydrus", "hydrusnetwork"} or (store_name.lower() in hydrus_instances)
store_hint = store_name.lower() in {"hydrus", "hydrusnetwork"} or (
store_name.lower() in hydrus_instances
)
if not (is_hydrus_url or store_hint):
return None
@@ -402,7 +415,11 @@ def _resolve_upload_path(item: Any, config: Dict[str, Any]) -> Optional[str]:
base_tmp = None
if isinstance(config, dict):
base_tmp = config.get("temp")
output_dir = Path(str(base_tmp)).expanduser() if base_tmp else (Path(tempfile.gettempdir()) / "Medios-Macina")
output_dir = (
Path(str(base_tmp)).expanduser()
if base_tmp
else (Path(tempfile.gettempdir()) / "Medios-Macina")
)
output_dir = output_dir / "matrix" / "hydrus"
hydrus_path = _maybe_download_hydrus_file(item, config, output_dir)
if hydrus_path:
@@ -423,11 +440,20 @@ def _resolve_upload_path(item: Any, config: Dict[str, Any]) -> Optional[str]:
base_tmp = None
if isinstance(config, dict):
base_tmp = config.get("temp")
output_dir = Path(str(base_tmp)).expanduser() if base_tmp else (Path(tempfile.gettempdir()) / "Medios-Macina")
output_dir = (
Path(str(base_tmp)).expanduser()
if base_tmp
else (Path(tempfile.gettempdir()) / "Medios-Macina")
)
output_dir = output_dir / "matrix"
output_dir.mkdir(parents=True, exist_ok=True)
result = _download_direct_file(url, output_dir, quiet=True)
if result and hasattr(result, "path") and isinstance(result.path, Path) and result.path.exists():
if (
result
and hasattr(result, "path")
and isinstance(result.path, Path)
and result.path.exists()
):
return str(result.path)
except Exception as exc:
debug(f"[matrix] Failed to download URL for upload: {exc}")
@@ -467,6 +493,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
return 1
from Provider.matrix import Matrix
try:
provider = Matrix(config)
except Exception as exc:
@@ -490,7 +517,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
file_path = _resolve_upload_path(item, config)
if not file_path:
any_failed = True
log("Matrix upload requires a local file (path) or a direct URL on the selected item", file=sys.stderr)
log(
"Matrix upload requires a local file (path) or a direct URL on the selected item",
file=sys.stderr,
)
continue
media_path = Path(file_path)
@@ -561,6 +591,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
pass
from Provider.matrix import Matrix
try:
provider = Matrix(config)
except Exception as exc:
@@ -581,7 +612,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Diagnostics if a configured filter yields no rows (provider filtered before name lookups for speed).
if not rooms and not _has_flag(args, "-all"):
configured_ids_dbg = [str(v).strip() for v in _parse_config_room_filter_ids(config) if str(v).strip()]
configured_ids_dbg = [
str(v).strip() for v in _parse_config_room_filter_ids(config) if str(v).strip()
]
if configured_ids_dbg:
try:
joined_ids = provider.list_joined_room_ids()
@@ -592,7 +625,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if not rooms:
if _parse_config_room_filter_ids(config) and not _has_flag(args, "-all"):
log("No joined rooms matched the configured Matrix room filter (use: .matrix -all)", file=sys.stderr)
log(
"No joined rooms matched the configured Matrix room filter (use: .matrix -all)",
file=sys.stderr,
)
else:
log("No joined rooms found.", file=sys.stderr)
return 0
@@ -630,15 +666,31 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
ctx.set_pending_pipeline_tail([[".matrix", "-send"]], ".matrix")
return 0
CMDLET = Cmdlet(
name=".matrix",
alias=["matrix", "rooms"],
summary="Send selected items to a Matrix room",
usage="@N | .matrix",
arg=[
CmdletArg(name="send", type="bool", description="(internal) Send to selected room(s)", required=False),
CmdletArg(name="all", type="bool", description="Ignore config room filter and show all joined rooms", required=False),
CmdletArg(name="text", type="string", description="Send a follow-up text message after each upload (caption-like)", required=False),
CmdletArg(
name="send",
type="bool",
description="(internal) Send to selected room(s)",
required=False,
),
CmdletArg(
name="all",
type="bool",
description="Ignore config room filter and show all joined rooms",
required=False,
),
CmdletArg(
name="text",
type="string",
description="Send a follow-up text message after each upload (caption-like)",
required=False,
),
],
exec=_run
exec=_run,
)
+1 -5
View File
@@ -112,11 +112,7 @@ def _get_active_table(piped_result: Any) -> Optional[Any]:
if piped_result.__class__.__name__ == "ResultTable":
return piped_result
return (
ctx.get_display_table()
or ctx.get_current_stage_table()
or ctx.get_last_result_table()
)
return ctx.get_display_table() or ctx.get_current_stage_table() or ctx.get_last_result_table()
def _run(piped_result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
+209 -120
View File
@@ -18,7 +18,6 @@ from API.folder import LocalLibrarySearchOptimizer
from config import get_local_storage_path, get_hydrus_access_key, get_hydrus_url
_ALLDEBRID_UNLOCK_CACHE: Dict[str, str] = {}
@@ -69,10 +68,14 @@ def _try_enable_mpv_file_logging(mpv_log_path: str, *, attempts: int = 3) -> boo
# Try to set log-file and verbose level.
r1 = _send_ipc_command({"command": ["set_property", "options/log-file", mpv_log_path]})
r2 = _send_ipc_command({"command": ["set_property", "options/msg-level", "all=v"]})
ok = bool((r1 and r1.get("error") == "success") or (r2 and r2.get("error") == "success"))
ok = bool(
(r1 and r1.get("error") == "success") or (r2 and r2.get("error") == "success")
)
# Emit a predictable line so the file isn't empty if logging is active.
_send_ipc_command({"command": ["print-text", f"medeia: log enabled -> {mpv_log_path}"]}, silent=True)
_send_ipc_command(
{"command": ["print-text", f"medeia: log enabled -> {mpv_log_path}"]}, silent=True
)
except Exception:
ok = False
@@ -86,6 +89,7 @@ def _try_enable_mpv_file_logging(mpv_log_path: str, *, attempts: int = 3) -> boo
try:
import time
time.sleep(0.15)
except Exception:
break
@@ -168,6 +172,7 @@ def _ensure_lyric_overlay(mpv: MPV) -> None:
except Exception:
pass
def _send_ipc_command(command: Dict[str, Any], silent: bool = False) -> Optional[Any]:
"""Send a command to the MPV IPC pipe and return the response."""
try:
@@ -189,11 +194,12 @@ def _get_playlist(silent: bool = False) -> Optional[List[Dict[str, Any]]]:
return resp.get("data", [])
return []
def _extract_title_from_item(item: Dict[str, Any]) -> str:
"""Extract a clean title from an MPV playlist item, handling memory:// M3U hacks."""
title = item.get("title")
filename = item.get("filename") or ""
# Special handling for memory:// M3U playlists (used to pass titles via IPC)
if "memory://" in filename and "#EXTINF:" in filename:
try:
@@ -204,18 +210,18 @@ def _extract_title_from_item(item: Dict[str, Any]) -> str:
extracted_title = match.group(1).strip()
if not title or title == "memory://":
title = extracted_title
# If we still don't have a title, try to find the URL in the M3U content
if not title:
lines = filename.splitlines()
for line in lines:
line = line.strip()
if line and not line.startswith('#') and not line.startswith('memory://'):
if line and not line.startswith("#") and not line.startswith("memory://"):
# Found the URL, use it as title
return line
except Exception:
pass
return title or filename or "Unknown"
@@ -225,7 +231,7 @@ def _extract_target_from_memory_uri(text: str) -> Optional[str]:
return None
for line in text.splitlines():
line = line.strip()
if not line or line.startswith('#') or line.startswith('memory://'):
if not line or line.startswith("#") or line.startswith("memory://"):
continue
return line
return None
@@ -233,11 +239,11 @@ def _extract_target_from_memory_uri(text: str) -> Optional[str]:
def _find_hydrus_instance_for_hash(hash_str: str, file_storage: Any) -> Optional[str]:
"""Find which Hydrus instance serves a specific file hash.
Args:
hash_str: SHA256 hash (64 hex chars)
file_storage: FileStorage instance with Hydrus backends
Returns:
Instance name (e.g., 'home') or None if not found
"""
@@ -248,7 +254,7 @@ def _find_hydrus_instance_for_hash(hash_str: str, file_storage: Any) -> Optional
backend_class = type(backend).__name__
if backend_class != "HydrusNetwork":
continue
try:
# Query metadata to see if this instance has the file
metadata = backend.get_metadata(hash_str)
@@ -257,44 +263,44 @@ def _find_hydrus_instance_for_hash(hash_str: str, file_storage: Any) -> Optional
except Exception:
# This instance doesn't have the file or had an error
continue
return None
def _find_hydrus_instance_by_url(url: str, file_storage: Any) -> Optional[str]:
"""Find which Hydrus instance matches a given URL.
Args:
url: Full URL (e.g., http://localhost:45869/get_files/file?hash=...)
file_storage: FileStorage instance with Hydrus backends
Returns:
Instance name (e.g., 'home') or None if not found
"""
from urllib.parse import urlparse
parsed_target = urlparse(url)
target_netloc = parsed_target.netloc.lower()
# Check each Hydrus backend's URL
for backend_name in file_storage.list_backends():
backend = file_storage[backend_name]
backend_class = type(backend).__name__
if backend_class != "HydrusNetwork":
continue
# Get the backend's base URL from its client
try:
backend_url = backend._client.base_url
parsed_backend = urlparse(backend_url)
backend_netloc = parsed_backend.netloc.lower()
# Match by netloc (host:port)
if target_netloc == backend_netloc:
return backend_name
except Exception:
continue
return None
@@ -324,17 +330,19 @@ def _normalize_playlist_path(text: Optional[str]) -> Optional[str]:
pass
# Normalize slashes for Windows paths and lowercase for comparison
real = real.replace('\\', '/')
real = real.replace("\\", "/")
return real.lower()
def _infer_store_from_playlist_item(item: Dict[str, Any], file_storage: Optional[Any] = None) -> str:
def _infer_store_from_playlist_item(
item: Dict[str, Any], file_storage: Optional[Any] = None
) -> str:
"""Infer a friendly store label from an MPV playlist entry.
Args:
item: MPV playlist item dict
file_storage: Optional FileStorage instance for querying specific backend instances
Returns:
Store label (e.g., 'home', 'work', 'local', 'youtube', etc.)
"""
@@ -423,7 +431,7 @@ def _infer_store_from_playlist_item(item: Dict[str, Any], file_storage: Optional
return hydrus_instance
return "hydrus"
parts = host_stripped.split('.')
parts = host_stripped.split(".")
if len(parts) >= 2:
return parts[-2] or host_stripped
return host_stripped
@@ -440,7 +448,9 @@ def _build_hydrus_header(config: Dict[str, Any]) -> Optional[str]:
return f"Hydrus-Client-API-Access-Key: {key}"
def _build_ytdl_options(config: Optional[Dict[str, Any]], hydrus_header: Optional[str]) -> Optional[str]:
def _build_ytdl_options(
config: Optional[Dict[str, Any]], hydrus_header: Optional[str]
) -> Optional[str]:
"""Compose ytdl-raw-options string including cookies and optional Hydrus header."""
opts: List[str] = []
cookies_path = None
@@ -454,7 +464,7 @@ def _build_ytdl_options(config: Optional[Dict[str, Any]], hydrus_header: Optiona
cookies_path = None
if cookies_path:
opts.append(f"cookies={cookies_path.replace('\\', '/')}" )
opts.append(f"cookies={cookies_path.replace('\\', '/')}")
else:
opts.append("cookies-from-browser=chrome")
if hydrus_header:
@@ -484,9 +494,11 @@ def _is_hydrus_path(path: str, hydrus_url: Optional[str]) -> bool:
return True
return False
def _ensure_ytdl_cookies(config: Optional[Dict[str, Any]] = None) -> None:
"""Ensure yt-dlp options are set correctly for this session."""
from pathlib import Path
cookies_path = None
try:
from tool.ytdlp import YtDlpTool
@@ -498,7 +510,7 @@ def _ensure_ytdl_cookies(config: Optional[Dict[str, Any]] = None) -> None:
cookies_path = None
if cookies_path:
# Check if file exists and has content (use forward slashes for path checking)
check_path = cookies_path.replace('\\', '/')
check_path = cookies_path.replace("\\", "/")
file_obj = Path(cookies_path)
if file_obj.exists():
file_size = file_obj.stat().st_size
@@ -508,6 +520,7 @@ def _ensure_ytdl_cookies(config: Optional[Dict[str, Any]] = None) -> None:
else:
debug("No cookies file configured")
def _monitor_mpv_logs(duration: float = 3.0) -> None:
"""Monitor MPV logs for a short duration to capture errors."""
try:
@@ -516,16 +529,17 @@ def _monitor_mpv_logs(duration: float = 3.0) -> None:
if not client.connect():
debug("Failed to connect to MPV for log monitoring", file=sys.stderr)
return
# Request log messages
client.send_command({"command": ["request_log_messages", "warn"]})
# On Windows named pipes, avoid blocking the CLI; skip log read entirely
if client.is_windows:
client.disconnect()
return
import time
start_time = time.time()
# Unix sockets already have timeouts set; read until duration expires
@@ -585,14 +599,18 @@ def _tail_text_file(path: str, *, max_lines: int = 120, max_bytes: int = 65536)
return lines
except Exception:
return []
def _get_playable_path(item: Any, file_storage: Optional[Any], config: Optional[Dict[str, Any]]) -> Optional[tuple[str, Optional[str]]]:
def _get_playable_path(
item: Any, file_storage: Optional[Any], config: Optional[Dict[str, Any]]
) -> Optional[tuple[str, Optional[str]]]:
"""Extract a playable path/URL from an item, handling different store types.
Args:
item: Item to extract path from (dict, PipeObject, or string)
file_storage: FileStorage instance for querying backends
config: Config dict for Hydrus URL
Returns:
Tuple of (path, title) or None if no valid path found
"""
@@ -600,7 +618,7 @@ def _get_playable_path(item: Any, file_storage: Optional[Any], config: Optional[
title: Optional[str] = None
store: Optional[str] = None
file_hash: Optional[str] = None
# Extract fields from item - prefer a disk path ('path'), but accept 'url' as fallback for providers
if isinstance(item, dict):
path = item.get("path")
@@ -614,13 +632,25 @@ def _get_playable_path(item: Any, file_storage: Optional[Any], config: Optional[
title = item.get("title") or item.get("file_title")
store = item.get("store")
file_hash = item.get("hash")
elif hasattr(item, "path") or hasattr(item, "url") or hasattr(item, "source_url") or hasattr(item, "store") or hasattr(item, "hash"):
elif (
hasattr(item, "path")
or hasattr(item, "url")
or hasattr(item, "source_url")
or hasattr(item, "store")
or hasattr(item, "hash")
):
# Handle PipeObject / dataclass objects - prefer path, but fall back to url/source_url attributes
path = getattr(item, "path", None)
if not path:
path = getattr(item, "url", None) or getattr(item, "source_url", None) or getattr(item, "target", None)
path = (
getattr(item, "url", None)
or getattr(item, "source_url", None)
or getattr(item, "target", None)
)
if not path:
known = getattr(item, "url", None) or (getattr(item, "extra", None) or {}).get("url")
known = getattr(item, "url", None) or (getattr(item, "extra", None) or {}).get(
"url"
)
if known and isinstance(known, list):
path = known[0]
title = getattr(item, "title", None) or getattr(item, "file_title", None)
@@ -628,13 +658,13 @@ def _get_playable_path(item: Any, file_storage: Optional[Any], config: Optional[
file_hash = getattr(item, "hash", None)
elif isinstance(item, str):
path = item
# Debug: show incoming values
try:
debug(f"_get_playable_path: store={store}, path={path}, hash={file_hash}")
except Exception:
pass
# Treat common placeholders as missing.
if isinstance(path, str) and path.strip().lower() in {"", "n/a", "na", "none"}:
path = None
@@ -644,7 +674,7 @@ def _get_playable_path(item: Any, file_storage: Optional[Any], config: Optional[
if isinstance(file_hash, str):
file_hash = file_hash.strip().lower()
# Resolve hash+store into a playable target (file path or URL).
# This is unrelated to MPV's IPC pipe and keeps "pipe" terminology reserved for:
# - MPV IPC pipe (transport)
@@ -663,7 +693,11 @@ def _get_playable_path(item: Any, file_storage: Optional[Any], config: Optional[
backend_class = type(backend).__name__
# Folder stores: resolve to an on-disk file path.
if hasattr(backend, "get_file") and callable(getattr(backend, "get_file")) and backend_class == "Folder":
if (
hasattr(backend, "get_file")
and callable(getattr(backend, "get_file"))
and backend_class == "Folder"
):
try:
resolved = backend.get_file(file_hash)
if isinstance(resolved, Path):
@@ -705,11 +739,11 @@ def _queue_items(
start_opts: Optional[Dict[str, Any]] = None,
) -> bool:
"""Queue items to MPV, starting it if necessary.
Args:
items: List of items to queue
clear_first: If True, the first item will replace the current playlist
Returns:
True if MPV was started, False if items were queued via IPC.
"""
@@ -729,11 +763,12 @@ def _queue_items(
hydrus_url = get_hydrus_url(config) if config is not None else None
except Exception:
hydrus_url = None
# Initialize Store registry for path resolution
file_storage = None
try:
from Store import Store
file_storage = Store(config or {})
except Exception as e:
debug(f"Warning: Could not initialize Store registry: {e}", file=sys.stderr)
@@ -757,7 +792,9 @@ def _queue_items(
# Remove duplicates from playlist starting from the end to keep indices valid
for idx in reversed(dup_indexes):
try:
_send_ipc_command({"command": ["playlist-remove", idx], "request_id": 106}, silent=True)
_send_ipc_command(
{"command": ["playlist-remove", idx], "request_id": 106}, silent=True
)
except Exception:
pass
@@ -774,7 +811,7 @@ def _queue_items(
if not result:
debug(f"_queue_items: item idx={i} produced no playable path")
continue
target, title = result
# If the target is an AllDebrid protected file URL, unlock it to a direct link for MPV.
@@ -812,15 +849,19 @@ def _queue_items(
if base_url:
effective_hydrus_url = str(base_url).rstrip("/")
if key:
effective_hydrus_header = f"Hydrus-Client-API-Access-Key: {str(key).strip()}"
effective_hydrus_header = (
f"Hydrus-Client-API-Access-Key: {str(key).strip()}"
)
effective_ytdl_opts = _build_ytdl_options(config, effective_hydrus_header)
except Exception:
pass
if target:
# If we just have a hydrus hash, build a direct file URL for MPV
if re.fullmatch(r"[0-9a-f]{64}", str(target).strip().lower()) and effective_hydrus_url:
target = f"{effective_hydrus_url.rstrip('/')}/get_files/file?hash={str(target).strip()}"
target = (
f"{effective_hydrus_url.rstrip('/')}/get_files/file?hash={str(target).strip()}"
)
norm_key = _normalize_playlist_path(target) or str(target).strip().lower()
if norm_key in existing_targets or norm_key in new_targets:
@@ -833,13 +874,17 @@ def _queue_items(
# show the raw URL as the playlist title.
if title:
# Sanitize title for M3U (remove newlines)
safe_title = title.replace('\n', ' ').replace('\r', '')
safe_title = title.replace("\n", " ").replace("\r", "")
# Carry the store name for hash URLs so MPV.lyric can resolve the backend.
# This is especially important for local file-server URLs like /get_files/file?hash=...
target_for_m3u = target
try:
if item_store_name and isinstance(target_for_m3u, str) and target_for_m3u.startswith("http"):
if (
item_store_name
and isinstance(target_for_m3u, str)
and target_for_m3u.startswith("http")
):
if "get_files/file" in target_for_m3u and "store=" not in target_for_m3u:
sep = "&" if "?" in target_for_m3u else "?"
target_for_m3u = f"{target_for_m3u}{sep}store={item_store_name}"
@@ -858,10 +903,16 @@ def _queue_items(
# If this is a Hydrus path, set header property and yt-dlp headers before loading.
# Use the real target (not the memory:// wrapper) for detection.
if effective_hydrus_header and _is_hydrus_path(str(target), effective_hydrus_url):
header_cmd = {"command": ["set_property", "http-header-fields", effective_hydrus_header], "request_id": 199}
header_cmd = {
"command": ["set_property", "http-header-fields", effective_hydrus_header],
"request_id": 199,
}
_send_ipc_command(header_cmd, silent=True)
if effective_ytdl_opts:
ytdl_cmd = {"command": ["set_property", "ytdl-raw-options", effective_ytdl_opts], "request_id": 197}
ytdl_cmd = {
"command": ["set_property", "ytdl-raw-options", effective_ytdl_opts],
"request_id": 197,
}
_send_ipc_command(ytdl_cmd, silent=True)
cmd = {"command": ["loadfile", target_to_send, mode], "request_id": 200}
@@ -872,11 +923,13 @@ def _queue_items(
except Exception as e:
debug(f"Exception sending loadfile to MPV: {e}", file=sys.stderr)
resp = None
if resp is None:
# MPV not running (or died)
# Start MPV with remaining items
debug(f"MPV not running/died while queuing, starting MPV with remaining items: {items[i:]}")
debug(
f"MPV not running/died while queuing, starting MPV with remaining items: {items[i:]}"
)
_start_mpv(items[i:], config=config, start_opts=start_opts)
return True
elif resp.get("error") == "success":
@@ -884,13 +937,14 @@ def _queue_items(
# would change the MPV window title even if the item isn't currently playing.
debug(f"Queued: {title or target}")
else:
error_msg = str(resp.get('error'))
error_msg = str(resp.get("error"))
debug(f"Failed to queue item: {error_msg}", file=sys.stderr)
return False
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Manage and play items in the MPV playlist via IPC."""
parsed = parse_cmdlet_args(args, CMDLET)
log_requested = bool(parsed.get("log"))
@@ -912,7 +966,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
log_dir = _repo_log_dir()
mpv_log_path = str((log_dir / "medeia-mpv.log").resolve())
except Exception:
mpv_log_path = str((Path(os.environ.get("TEMP") or os.environ.get("TMP") or ".") / "medeia-mpv.log").resolve())
mpv_log_path = str(
(
Path(os.environ.get("TEMP") or os.environ.get("TMP") or ".")
/ "medeia-mpv.log"
).resolve()
)
# Ensure file exists early so we can tail it even if mpv writes later.
try:
Path(mpv_log_path).parent.mkdir(parents=True, exist_ok=True)
@@ -1019,11 +1078,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Emit the current item to pipeline
result_obj = {
'path': filename,
'title': title,
'cmdlet_name': '.pipe',
'source': 'pipe',
'__pipe_index': items.index(current_item),
"path": filename,
"title": title,
"cmdlet_name": ".pipe",
"source": "pipe",
"__pipe_index": items.index(current_item),
}
ctx.emit(result_obj)
@@ -1040,6 +1099,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if mpv_started:
# MPV was just started, wait a moment for it to be ready, then play first item
import time
time.sleep(0.5)
index_arg = "1" # 1-based index for first item
play_mode = True
@@ -1061,6 +1121,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if mpv_started:
# MPV was just started; give it a moment, then play first item.
import time
time.sleep(0.5)
index_arg = "1"
else:
@@ -1156,7 +1217,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Queue items (replacing current playlist)
if items:
_queue_items(items, clear_first=True, config=config, start_opts=start_opts)
_queue_items(
items, clear_first=True, config=config, start_opts=start_opts
)
else:
# Empty playlist, just clear
_send_ipc_command({"command": ["playlist-clear"]}, silent=True)
@@ -1180,22 +1243,22 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
table = ResultTable("Saved Playlists")
for i, pl in enumerate(playlists):
item_count = len(pl.get('items', []))
item_count = len(pl.get("items", []))
row = table.add_row()
# row.add_column("ID", str(pl['id'])) # Hidden as per user request
row.add_column("Name", pl['name'])
row.add_column("Name", pl["name"])
row.add_column("Items", str(item_count))
row.add_column("Updated", pl['updated_at'])
row.add_column("Updated", pl["updated_at"])
# Set the playlist items as the result object for this row
# When user selects @N, they get the list of items
# We also set the source command to .pipe -load <ID> so it loads it
table.set_row_selection_args(i, ["-load", str(pl['id'])])
table.set_row_selection_args(i, ["-load", str(pl["id"])])
table.set_source_command(".pipe")
# Register results
ctx.set_last_result_table_overlay(table, [p['items'] for p in playlists])
ctx.set_last_result_table_overlay(table, [p["items"] for p in playlists])
ctx.set_current_stage_table(table)
# Do not print directly here.
@@ -1243,7 +1306,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
playlist_before = _get_playlist(silent=True)
idle_before = None
try:
idle_resp = _send_ipc_command({"command": ["get_property", "idle-active"], "request_id": 111}, silent=True)
idle_resp = _send_ipc_command(
{"command": ["get_property", "idle-active"], "request_id": 111}, silent=True
)
if idle_resp and idle_resp.get("error") == "success":
idle_before = bool(idle_resp.get("data"))
except Exception:
@@ -1262,7 +1327,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Debug: inspect incoming result and attributes
try:
debug(f"pipe._run: received result type={type(result)} repr={repr(result)[:200]}")
debug(f"pipe._run: attrs path={getattr(result, 'path', None)} url={getattr(result, 'url', None)} store={getattr(result, 'store', None)} hash={getattr(result, 'hash', None)}")
debug(
f"pipe._run: attrs path={getattr(result, 'path', None)} url={getattr(result, 'url', None)} store={getattr(result, 'store', None)} hash={getattr(result, 'hash', None)}"
)
except Exception:
pass
@@ -1294,8 +1361,14 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if should_autoplay and after_len > 0:
idx_to_play = min(max(0, before_len), after_len - 1)
play_resp = _send_ipc_command({"command": ["playlist-play-index", idx_to_play], "request_id": 112}, silent=True)
_send_ipc_command({"command": ["set_property", "pause", False], "request_id": 113}, silent=True)
play_resp = _send_ipc_command(
{"command": ["playlist-play-index", idx_to_play], "request_id": 112},
silent=True,
)
_send_ipc_command(
{"command": ["set_property", "pause", False], "request_id": 113},
silent=True,
)
if play_resp and play_resp.get("error") == "success":
debug("Auto-playing piped item")
@@ -1315,6 +1388,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if mpv_started:
# MPV was just started, retry getting playlist after a brief delay
import time
time.sleep(0.3)
items = _get_playlist(silent=True)
@@ -1324,10 +1398,20 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
return 0
else:
# Do not auto-launch MPV when no action/inputs were provided; avoid surprise startups
no_inputs = not any([
result, url_arg, index_arg, clear_mode, play_mode,
pause_mode, save_mode, load_mode, current_mode, list_mode
])
no_inputs = not any(
[
result,
url_arg,
index_arg,
clear_mode,
play_mode,
pause_mode,
save_mode,
load_mode,
current_mode,
list_mode,
]
)
if no_inputs:
# User invoked `.pipe` with no args: treat this as an intent to open MPV.
@@ -1337,6 +1421,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Re-check playlist after startup; if IPC still isn't ready, just exit cleanly.
try:
import time
time.sleep(0.3)
except Exception:
pass
@@ -1386,18 +1471,26 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
list_mode = True
index_arg = None
else:
debug(f"Failed to remove item: {resp.get('error') if resp else 'No response'}")
debug(
f"Failed to remove item: {resp.get('error') if resp else 'No response'}"
)
return 1
else:
# Play item
if hydrus_header and _is_hydrus_path(filename, hydrus_url):
header_cmd = {"command": ["set_property", "http-header-fields", hydrus_header], "request_id": 198}
header_cmd = {
"command": ["set_property", "http-header-fields", hydrus_header],
"request_id": 198,
}
_send_ipc_command(header_cmd, silent=True)
cmd = {"command": ["playlist-play-index", idx], "request_id": 102}
resp = _send_ipc_command(cmd)
if resp and resp.get("error") == "success":
# Ensure playback starts (unpause)
unpause_cmd = {"command": ["set_property", "pause", False], "request_id": 103}
unpause_cmd = {
"command": ["set_property", "pause", False],
"request_id": 103,
}
_send_ipc_command(unpause_cmd)
debug(f"Playing: {title}")
@@ -1410,7 +1503,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
list_mode = True
index_arg = None
else:
debug(f"Failed to play item: {resp.get('error') if resp else 'No response'}")
debug(
f"Failed to play item: {resp.get('error') if resp else 'No response'}"
)
return 1
except ValueError:
debug(f"Invalid index: {index_arg}")
@@ -1425,6 +1520,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if file_storage is None:
try:
from Store import Store
file_storage = Store(config)
except Exception as e:
debug(f"Warning: Could not initialize Store registry: {e}", file=sys.stderr)
@@ -1468,7 +1564,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Try to extract hash from filename (e.g., C:\path\1e8c46...a1b2.mp4)
path_obj = Path(real_path)
stem = path_obj.stem # filename without extension
if len(stem) == 64 and all(c in '0123456789abcdef' for c in stem.lower()):
if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()):
file_hash = stem.lower()
# Find which folder store has this file
if file_storage:
@@ -1493,7 +1589,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
hash=file_hash or "unknown",
store=store_name or "unknown",
title=title,
path=real_path
path=real_path,
)
pipe_objects.append(pipe_obj)
@@ -1540,6 +1636,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
break
try:
import time
time.sleep(0.25)
except Exception:
break
@@ -1550,8 +1647,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
print(ln)
else:
print("MPV log (tail): <empty>")
print("Note: On some Windows builds, mpv cannot start writing to --log-file after launch.")
print("If you need full [main2] logs, restart mpv so it starts with --log-file.")
print(
"Note: On some Windows builds, mpv cannot start writing to --log-file after launch."
)
print(
"If you need full [main2] logs, restart mpv so it starts with --log-file."
)
# Also print the helper log tail (this captures Python helper output that won't
# necessarily show up in MPV's own log-file).
@@ -1597,7 +1698,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
except Exception:
pass
def _start_mpv(items: List[Any], config: Optional[Dict[str, Any]] = None, start_opts: Optional[Dict[str, Any]] = None) -> None:
def _start_mpv(
items: List[Any],
config: Optional[Dict[str, Any]] = None,
start_opts: Optional[Dict[str, Any]] = None,
) -> None:
"""Start MPV with a list of items."""
import time as _time_module
@@ -1624,7 +1730,7 @@ def _start_mpv(items: List[Any], config: Optional[Dict[str, Any]] = None, start_
try:
extra_args: List[str] = [
'--ytdl-format=bestvideo[height<=?1080]+bestaudio/best[height<=?1080]',
"--ytdl-format=bestvideo[height<=?1080]+bestaudio/best[height<=?1080]",
]
# Optional: borderless window (useful for uosc-like overlay UI without fullscreen).
@@ -1645,7 +1751,7 @@ def _start_mpv(items: List[Any], config: Optional[Dict[str, Any]] = None, start_
detached=True,
)
debug("Started MPV process")
# Wait for IPC pipe to be ready
if not mpv.wait_for_ipc(retries=20, delay_seconds=0.2):
debug("Timed out waiting for MPV IPC connection", file=sys.stderr)
@@ -1659,15 +1765,16 @@ def _start_mpv(items: List[Any], config: Optional[Dict[str, Any]] = None, start_
# Queue items via IPC
if items:
_queue_items(items, config=config, start_opts=start_opts)
# Auto-play the first item
import time
time.sleep(0.3) # Give MPV a moment to process the queued items
# Play the first item (index 0) and unpause
play_cmd = {"command": ["playlist-play-index", 0], "request_id": 102}
play_resp = _send_ipc_command(play_cmd, silent=True)
if play_resp and play_resp.get("error") == "success":
# Ensure playback starts (unpause)
unpause_cmd = {"command": ["set_property", "pause", False], "request_id": 103}
@@ -1675,7 +1782,7 @@ def _start_mpv(items: List[Any], config: Optional[Dict[str, Any]] = None, start_
debug("Auto-playing first item")
# Overlay already started above; it will follow track changes automatically.
except Exception as e:
debug(f"Error starting MPV: {e}", file=sys.stderr)
@@ -1688,36 +1795,19 @@ CMDLET = Cmdlet(
arg=[
CmdletArg(
name="index",
type="string", # Changed to string to allow URL detection
type="string", # Changed to string to allow URL detection
description="Index of item to play/clear, or URL to queue",
required=False
),
CmdletArg(
name="url",
type="string",
description="URL to queue",
required=False
required=False,
),
CmdletArg(name="url", type="string", description="URL to queue", required=False),
CmdletArg(
name="clear",
type="flag",
description="Remove the selected item, or clear entire playlist if no index provided"
),
CmdletArg(
name="list",
type="flag",
description="List items (default)"
),
CmdletArg(
name="play",
type="flag",
description="Resume playback"
),
CmdletArg(
name="pause",
type="flag",
description="Pause playback"
description="Remove the selected item, or clear entire playlist if no index provided",
),
CmdletArg(name="list", type="flag", description="List items (default)"),
CmdletArg(name="play", type="flag", description="Resume playback"),
CmdletArg(name="pause", type="flag", description="Pause playback"),
CmdletArg(
name="save",
type="flag",
@@ -1733,19 +1823,18 @@ CMDLET = Cmdlet(
CmdletArg(
name="current",
type="flag",
description="Emit the currently playing item to pipeline for further processing"
description="Emit the currently playing item to pipeline for further processing",
),
CmdletArg(
name="log",
type="flag",
description="Enable pipeable debug output and write an mpv log file"
description="Enable pipeable debug output and write an mpv log file",
),
CmdletArg(
name="borderless",
type="flag",
description="Start mpv with no window border (uosc-like overlay feel without fullscreen)"
description="Start mpv with no window border (uosc-like overlay feel without fullscreen)",
),
],
exec=_run
exec=_run,
)
+284 -262
View File
@@ -14,307 +14,329 @@ _TELEGRAM_PENDING_ITEMS_KEY = "telegram_pending_items"
def _has_flag(args: Sequence[str], flag: str) -> bool:
try:
want = str(flag or "").strip().lower()
if not want:
return False
return any(str(a).strip().lower() == want for a in (args or []))
except Exception:
return False
try:
want = str(flag or "").strip().lower()
if not want:
return False
return any(str(a).strip().lower() == want for a in (args or []))
except Exception:
return False
def _normalize_to_list(value: Any) -> List[Any]:
if value is None:
return []
if isinstance(value, list):
return value
return [value]
if value is None:
return []
if isinstance(value, list):
return value
return [value]
def _extract_chat_id(chat_obj: Any) -> Optional[int]:
try:
if isinstance(chat_obj, dict):
maybe_id = chat_obj.get("id")
if maybe_id is not None:
return int(maybe_id)
extra = chat_obj.get("extra")
if isinstance(extra, dict):
v = extra.get("id")
if v is not None:
return int(v)
v = extra.get("chat_id")
if v is not None:
return int(v)
# PipeObject stores unknown fields in .extra
if hasattr(chat_obj, "extra"):
extra = getattr(chat_obj, "extra")
if isinstance(extra, dict):
v = extra.get("id")
if v is not None:
return int(v)
v = extra.get("chat_id")
if v is not None:
return int(v)
if hasattr(chat_obj, "id"):
maybe_id = getattr(chat_obj, "id")
if maybe_id is not None:
return int(maybe_id)
except Exception:
return None
return None
try:
if isinstance(chat_obj, dict):
maybe_id = chat_obj.get("id")
if maybe_id is not None:
return int(maybe_id)
extra = chat_obj.get("extra")
if isinstance(extra, dict):
v = extra.get("id")
if v is not None:
return int(v)
v = extra.get("chat_id")
if v is not None:
return int(v)
# PipeObject stores unknown fields in .extra
if hasattr(chat_obj, "extra"):
extra = getattr(chat_obj, "extra")
if isinstance(extra, dict):
v = extra.get("id")
if v is not None:
return int(v)
v = extra.get("chat_id")
if v is not None:
return int(v)
if hasattr(chat_obj, "id"):
maybe_id = getattr(chat_obj, "id")
if maybe_id is not None:
return int(maybe_id)
except Exception:
return None
return None
def _extract_chat_username(chat_obj: Any) -> str:
try:
if isinstance(chat_obj, dict):
u = chat_obj.get("username")
return str(u or "").strip()
if hasattr(chat_obj, "extra"):
extra = getattr(chat_obj, "extra")
if isinstance(extra, dict):
u = extra.get("username")
if isinstance(u, str) and u.strip():
return u.strip()
if hasattr(chat_obj, "username"):
return str(getattr(chat_obj, "username") or "").strip()
except Exception:
return ""
return ""
try:
if isinstance(chat_obj, dict):
u = chat_obj.get("username")
return str(u or "").strip()
if hasattr(chat_obj, "extra"):
extra = getattr(chat_obj, "extra")
if isinstance(extra, dict):
u = extra.get("username")
if isinstance(u, str) and u.strip():
return u.strip()
if hasattr(chat_obj, "username"):
return str(getattr(chat_obj, "username") or "").strip()
except Exception:
return ""
return ""
def _extract_title(item: Any) -> str:
try:
if isinstance(item, dict):
return str(item.get("title") or "").strip()
if hasattr(item, "title"):
return str(getattr(item, "title") or "").strip()
# PipeObject stores some fields in .extra
if hasattr(item, "extra"):
extra = getattr(item, "extra")
if isinstance(extra, dict):
v = extra.get("title")
if isinstance(v, str) and v.strip():
return v.strip()
except Exception:
return ""
return ""
try:
if isinstance(item, dict):
return str(item.get("title") or "").strip()
if hasattr(item, "title"):
return str(getattr(item, "title") or "").strip()
# PipeObject stores some fields in .extra
if hasattr(item, "extra"):
extra = getattr(item, "extra")
if isinstance(extra, dict):
v = extra.get("title")
if isinstance(v, str) and v.strip():
return v.strip()
except Exception:
return ""
return ""
def _extract_file_path(item: Any) -> Optional[str]:
def _maybe(value: Any) -> Optional[str]:
if value is None:
return None
text = str(value).strip()
if not text:
return None
if text.startswith("http://") or text.startswith("https://"):
return None
try:
p = Path(text).expanduser()
if p.exists():
return str(p)
except Exception:
return None
return None
def _maybe(value: Any) -> Optional[str]:
if value is None:
return None
text = str(value).strip()
if not text:
return None
if text.startswith("http://") or text.startswith("https://"):
return None
try:
p = Path(text).expanduser()
if p.exists():
return str(p)
except Exception:
return None
return None
try:
if hasattr(item, "path"):
found = _maybe(getattr(item, "path"))
if found:
return found
if hasattr(item, "file_path"):
found = _maybe(getattr(item, "file_path"))
if found:
return found
if isinstance(item, dict):
for key in ("path", "file_path", "target"):
found = _maybe(item.get(key))
if found:
return found
except Exception:
return None
return None
try:
if hasattr(item, "path"):
found = _maybe(getattr(item, "path"))
if found:
return found
if hasattr(item, "file_path"):
found = _maybe(getattr(item, "file_path"))
if found:
return found
if isinstance(item, dict):
for key in ("path", "file_path", "target"):
found = _maybe(item.get(key))
if found:
return found
except Exception:
return None
return None
def _run(_result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
from Provider.telegram import Telegram
from Provider.telegram import Telegram
try:
provider = Telegram(config)
except Exception as exc:
log(f"Telegram not available: {exc}", file=sys.stderr)
return 1
try:
provider = Telegram(config)
except Exception as exc:
log(f"Telegram not available: {exc}", file=sys.stderr)
return 1
if _has_flag(args, "-login"):
ok = False
try:
ok = provider.ensure_session(prompt=True)
except Exception:
ok = False
if not ok:
err = getattr(provider, "_last_login_error", None)
if isinstance(err, str) and err.strip():
log(f"Telegram login failed: {err}", file=sys.stderr)
else:
log("Telegram login failed (no session created).", file=sys.stderr)
return 1
log("Telegram login OK (authorized session ready).", file=sys.stderr)
return 0
if _has_flag(args, "-login"):
ok = False
try:
ok = provider.ensure_session(prompt=True)
except Exception:
ok = False
if not ok:
err = getattr(provider, "_last_login_error", None)
if isinstance(err, str) and err.strip():
log(f"Telegram login failed: {err}", file=sys.stderr)
else:
log("Telegram login failed (no session created).", file=sys.stderr)
return 1
log("Telegram login OK (authorized session ready).", file=sys.stderr)
return 0
# Internal stage: send previously selected pipeline items to selected chats.
if _has_flag(args, "-send"):
# Ensure we don't keep showing the picker table on the send stage.
try:
if hasattr(ctx, "set_last_result_table_overlay"):
ctx.set_last_result_table_overlay(None, None, None)
except Exception:
pass
try:
if hasattr(ctx, "set_current_stage_table"):
ctx.set_current_stage_table(None)
except Exception:
pass
# Internal stage: send previously selected pipeline items to selected chats.
if _has_flag(args, "-send"):
# Ensure we don't keep showing the picker table on the send stage.
try:
if hasattr(ctx, "set_last_result_table_overlay"):
ctx.set_last_result_table_overlay(None, None, None)
except Exception:
pass
try:
if hasattr(ctx, "set_current_stage_table"):
ctx.set_current_stage_table(None)
except Exception:
pass
selected_chats = _normalize_to_list(_result)
chat_ids: List[int] = []
chat_usernames: List[str] = []
for c in selected_chats:
cid = _extract_chat_id(c)
if cid is not None:
chat_ids.append(cid)
else:
u = _extract_chat_username(c)
if u:
chat_usernames.append(u)
selected_chats = _normalize_to_list(_result)
chat_ids: List[int] = []
chat_usernames: List[str] = []
for c in selected_chats:
cid = _extract_chat_id(c)
if cid is not None:
chat_ids.append(cid)
else:
u = _extract_chat_username(c)
if u:
chat_usernames.append(u)
# De-dupe chat identifiers (preserve order).
try:
chat_ids = list(dict.fromkeys([int(x) for x in chat_ids]))
except Exception:
pass
try:
chat_usernames = list(dict.fromkeys([str(u).strip() for u in chat_usernames if str(u).strip()]))
except Exception:
pass
# De-dupe chat identifiers (preserve order).
try:
chat_ids = list(dict.fromkeys([int(x) for x in chat_ids]))
except Exception:
pass
try:
chat_usernames = list(
dict.fromkeys([str(u).strip() for u in chat_usernames if str(u).strip()])
)
except Exception:
pass
if not chat_ids and not chat_usernames:
log("No Telegram chat selected (use @N on the Telegram table)", file=sys.stderr)
return 1
if not chat_ids and not chat_usernames:
log("No Telegram chat selected (use @N on the Telegram table)", file=sys.stderr)
return 1
pending_items = ctx.load_value(_TELEGRAM_PENDING_ITEMS_KEY, default=[])
items = _normalize_to_list(pending_items)
if not items:
log("No pending items to send (use: @N | .telegram)", file=sys.stderr)
return 1
pending_items = ctx.load_value(_TELEGRAM_PENDING_ITEMS_KEY, default=[])
items = _normalize_to_list(pending_items)
if not items:
log("No pending items to send (use: @N | .telegram)", file=sys.stderr)
return 1
file_jobs: List[Dict[str, str]] = []
any_failed = False
for item in items:
p = _extract_file_path(item)
if not p:
any_failed = True
log("Telegram send requires local file path(s) on the piped item(s)", file=sys.stderr)
continue
title = _extract_title(item)
file_jobs.append({"path": p, "title": title})
file_jobs: List[Dict[str, str]] = []
any_failed = False
for item in items:
p = _extract_file_path(item)
if not p:
any_failed = True
log(
"Telegram send requires local file path(s) on the piped item(s)",
file=sys.stderr,
)
continue
title = _extract_title(item)
file_jobs.append({"path": p, "title": title})
# De-dupe file paths (preserve order).
try:
seen: set[str] = set()
unique_jobs: List[Dict[str, str]] = []
for j in file_jobs:
k = str(j.get("path") or "").strip().lower()
if not k or k in seen:
continue
seen.add(k)
unique_jobs.append(j)
file_jobs = unique_jobs
except Exception:
pass
# De-dupe file paths (preserve order).
try:
seen: set[str] = set()
unique_jobs: List[Dict[str, str]] = []
for j in file_jobs:
k = str(j.get("path") or "").strip().lower()
if not k or k in seen:
continue
seen.add(k)
unique_jobs.append(j)
file_jobs = unique_jobs
except Exception:
pass
if not file_jobs:
return 1
if not file_jobs:
return 1
try:
provider.send_files_to_chats(chat_ids=chat_ids, usernames=chat_usernames, files=file_jobs)
except Exception as exc:
log(f"Telegram send failed: {exc}", file=sys.stderr)
any_failed = True
try:
provider.send_files_to_chats(
chat_ids=chat_ids, usernames=chat_usernames, files=file_jobs
)
except Exception as exc:
log(f"Telegram send failed: {exc}", file=sys.stderr)
any_failed = True
ctx.store_value(_TELEGRAM_PENDING_ITEMS_KEY, [])
return 1 if any_failed else 0
ctx.store_value(_TELEGRAM_PENDING_ITEMS_KEY, [])
return 1 if any_failed else 0
selected_items = _normalize_to_list(_result)
if selected_items:
ctx.store_value(_TELEGRAM_PENDING_ITEMS_KEY, selected_items)
else:
# Avoid stale sends if the user just wants to browse chats.
try:
ctx.store_value(_TELEGRAM_PENDING_ITEMS_KEY, [])
except Exception:
pass
try:
if hasattr(ctx, "clear_pending_pipeline_tail"):
ctx.clear_pending_pipeline_tail()
except Exception:
pass
selected_items = _normalize_to_list(_result)
if selected_items:
ctx.store_value(_TELEGRAM_PENDING_ITEMS_KEY, selected_items)
else:
# Avoid stale sends if the user just wants to browse chats.
try:
ctx.store_value(_TELEGRAM_PENDING_ITEMS_KEY, [])
except Exception:
pass
try:
if hasattr(ctx, "clear_pending_pipeline_tail"):
ctx.clear_pending_pipeline_tail()
except Exception:
pass
# Default: list available chats/channels (requires an existing session or bot_token).
try:
rows = provider.list_chats(limit=200)
except Exception as exc:
log(f"Failed to list Telegram chats: {exc}", file=sys.stderr)
return 1
# Default: list available chats/channels (requires an existing session or bot_token).
try:
rows = provider.list_chats(limit=200)
except Exception as exc:
log(f"Failed to list Telegram chats: {exc}", file=sys.stderr)
return 1
# Only show dialogs you can typically post to.
try:
rows = [r for r in (rows or []) if str(r.get("type") or "").strip().lower() in {"group", "user"}]
except Exception:
pass
# Only show dialogs you can typically post to.
try:
rows = [
r for r in (rows or []) if str(r.get("type") or "").strip().lower() in {"group", "user"}
]
except Exception:
pass
if not rows:
log("No Telegram groups/users available (or not logged in). Run: .telegram -login", file=sys.stderr)
return 0
if not rows:
log(
"No Telegram groups/users available (or not logged in). Run: .telegram -login",
file=sys.stderr,
)
return 0
table = ResultTable("Telegram Chats")
table.set_table("telegram")
table.set_source_command(".telegram", [])
table = ResultTable("Telegram Chats")
table.set_table("telegram")
table.set_source_command(".telegram", [])
chat_items: List[Dict[str, Any]] = []
for item in rows:
row = table.add_row()
title = str(item.get("title") or "").strip()
username = str(item.get("username") or "").strip()
chat_id = item.get("id")
kind = str(item.get("type") or "").strip()
row.add_column("Type", kind)
row.add_column("Title", title)
row.add_column("Username", username)
row.add_column("Id", str(chat_id) if chat_id is not None else "")
chat_items.append(
{
**item,
"store": "telegram",
"title": title or username or str(chat_id) or "Telegram",
}
)
chat_items: List[Dict[str, Any]] = []
for item in rows:
row = table.add_row()
title = str(item.get("title") or "").strip()
username = str(item.get("username") or "").strip()
chat_id = item.get("id")
kind = str(item.get("type") or "").strip()
row.add_column("Type", kind)
row.add_column("Title", title)
row.add_column("Username", username)
row.add_column("Id", str(chat_id) if chat_id is not None else "")
chat_items.append(
{
**item,
"store": "telegram",
"title": title or username or str(chat_id) or "Telegram",
}
)
# Overlay table: ensures @N selection targets this Telegram picker, not a previous table.
ctx.set_last_result_table_overlay(table, chat_items)
ctx.set_current_stage_table(table)
if selected_items:
ctx.set_pending_pipeline_tail([[".telegram", "-send"]], ".telegram")
return 0
# Overlay table: ensures @N selection targets this Telegram picker, not a previous table.
ctx.set_last_result_table_overlay(table, chat_items)
ctx.set_current_stage_table(table)
if selected_items:
ctx.set_pending_pipeline_tail([[".telegram", "-send"]], ".telegram")
return 0
CMDLET = Cmdlet(
name=".telegram",
alias=["telegram"],
summary="Telegram login and chat listing",
usage="@N | .telegram (pick a chat, then send piped files)",
arg=[
CmdletArg(name="login", type="bool", description="Create/refresh a Telegram session (prompts)", required=False),
CmdletArg(name="send", type="bool", description="(internal) Send to selected chat(s)", required=False),
],
exec=_run,
name=".telegram",
alias=["telegram"],
summary="Telegram login and chat listing",
usage="@N | .telegram (pick a chat, then send piped files)",
arg=[
CmdletArg(
name="login",
type="bool",
description="Create/refresh a Telegram session (prompts)",
required=False,
),
CmdletArg(
name="send",
type="bool",
description="(internal) Send to selected chat(s)",
required=False,
),
],
exec=_run,
)
+25 -5
View File
@@ -1,4 +1,5 @@
"""Worker cmdlet: Display workers table in ResultTable format."""
from __future__ import annotations
import sys
@@ -21,11 +22,26 @@ CMDLET = Cmdlet(
summary="Display workers table in result table format.",
usage=".worker [status] [-limit N] [@N]",
arg=[
CmdletArg("status", description="Filter by status: running, completed, error (default: all)", requires_db=True),
CmdletArg("limit", type="integer", description="Limit results (default: 100)", requires_db=True),
CmdletArg("@N", description="Select worker by index (1-based) and display full logs", requires_db=True),
CmdletArg(
"status",
description="Filter by status: running, completed, error (default: all)",
requires_db=True,
),
CmdletArg(
"limit", type="integer", description="Limit results (default: 100)", requires_db=True
),
CmdletArg(
"@N",
description="Select worker by index (1-based) and display full logs",
requires_db=True,
),
CmdletArg("-id", description="Show full logs for a specific worker", requires_db=True),
CmdletArg("-clear", type="flag", description="Remove completed workers from the database", requires_db=True),
CmdletArg(
"-clear",
type="flag",
description="Remove completed workers from the database",
requires_db=True,
),
],
detail=[
"- Shows all background worker tasks and their output",
@@ -188,7 +204,11 @@ def _render_worker_selection(db, selected_items: Any) -> int:
continue
events: List[Dict[str, Any]] = []
try:
events = db.get_worker_events(worker.get("worker_id")) if hasattr(db, "get_worker_events") else []
events = (
db.get_worker_events(worker.get("worker_id"))
if hasattr(db, "get_worker_events")
else []
)
except Exception:
events = []
_emit_worker_detail(worker, events)
+52 -38
View File
@@ -1,6 +1,5 @@
""" """
"""
"""
from __future__ import annotations
import re
@@ -76,7 +75,9 @@ def _merge_dict_inplace(base: Dict[str, Any], patch: Dict[str, Any]) -> Dict[str
return base
def _apply_conf_block(config: Dict[str, Any], kind: str, subtype: str, block: Dict[str, Any]) -> None:
def _apply_conf_block(
config: Dict[str, Any], kind: str, subtype: str, block: Dict[str, Any]
) -> None:
kind_l = str(kind).strip().lower()
subtype_l = str(subtype).strip().lower()
@@ -290,23 +291,24 @@ def _serialize_conf(config: Dict[str, Any]) -> str:
def _make_cache_key(config_dir: Optional[Path], filename: str, actual_path: Optional[Path]) -> str:
if actual_path:
return str(actual_path.resolve())
base_dir = (config_dir or SCRIPT_DIR)
base_dir = config_dir or SCRIPT_DIR
return str((base_dir / filename).resolve())
def get_hydrus_instance(config: Dict[str, Any], instance_name: str = "home") -> Optional[Dict[str, Any]]:
def get_hydrus_instance(
config: Dict[str, Any], instance_name: str = "home"
) -> Optional[Dict[str, Any]]:
"""Get a specific Hydrus instance config by name.
Supports multiple formats:
- Current: config["store"]["hydrusnetwork"][instance_name]
- Legacy: config["storage"]["hydrus"][instance_name]
- Old: config["HydrusNetwork"][instance_name]
Args:
config: Configuration dict
instance_name: Name of the Hydrus instance (default: "home")
Returns:
Dict with access key and URL, or None if not found
"""
@@ -323,14 +325,14 @@ def get_hydrus_instance(config: Dict[str, Any], instance_name: str = "home") ->
def get_hydrus_access_key(config: Dict[str, Any], instance_name: str = "home") -> Optional[str]:
"""Get Hydrus access key for an instance.
Config format:
- config["store"]["hydrusnetwork"][name]["API"]
Args:
config: Configuration dict
instance_name: Name of the Hydrus instance (default: "home")
Returns:
Access key string, or None if not found
"""
@@ -344,14 +346,14 @@ def get_hydrus_access_key(config: Dict[str, Any], instance_name: str = "home") -
def get_hydrus_url(config: Dict[str, Any], instance_name: str = "home") -> Optional[str]:
"""Get Hydrus URL for an instance.
Config format:
- config["store"]["hydrusnetwork"][name]["URL"]
Args:
config: Configuration dict
instance_name: Name of the Hydrus instance (default: "home")
Returns:
URL string, or None if not found
"""
@@ -380,15 +382,14 @@ def get_soulseek_password(config: Dict[str, Any]) -> Optional[str]:
return str(val).strip() if val else None
def resolve_output_dir(config: Dict[str, Any]) -> Path:
"""Resolve output directory from config with single source of truth.
Priority:
1. config["temp"] - explicitly set temp/output directory
2. config["outfile"] - fallback to outfile setting
3. Home/Videos - safe user directory fallback
Returns:
Path to output directory
"""
@@ -402,7 +403,7 @@ def resolve_output_dir(config: Dict[str, Any]) -> Path:
return path
except Exception:
pass
# Then try outfile setting
outfile_value = config.get("outfile")
if outfile_value:
@@ -410,22 +411,22 @@ def resolve_output_dir(config: Dict[str, Any]) -> Path:
return Path(str(outfile_value)).expanduser()
except Exception:
pass
# Fallback to user's Videos directory
return Path.home() / "Videos"
def get_local_storage_path(config: Dict[str, Any]) -> Optional[Path]:
"""Get local storage path from config.
Supports multiple formats:
- New: config["store"]["folder"]["default"]["path"]
- Old: config["storage"]["local"]["path"]
- Old: config["Local"]["path"]
Args:
config: Configuration dict
Returns:
Path object if found, None otherwise
"""
@@ -439,7 +440,7 @@ def get_local_storage_path(config: Dict[str, Any]) -> Optional[Path]:
path_str = default_config.get("path")
if path_str:
return Path(str(path_str)).expanduser()
# Fall back to storage.local.path format
storage = config.get("storage", {})
if isinstance(storage, dict):
@@ -448,14 +449,14 @@ def get_local_storage_path(config: Dict[str, Any]) -> Optional[Path]:
path_str = local_config.get("path")
if path_str:
return Path(str(path_str)).expanduser()
# Fall back to old Local format
local_config = config.get("Local", {})
if isinstance(local_config, dict):
path_str = local_config.get("path")
if path_str:
return Path(str(path_str)).expanduser()
return None
@@ -465,11 +466,11 @@ def get_debrid_api_key(config: Dict[str, Any], service: str = "All-debrid") -> O
Config format:
- config["store"]["debrid"][<name>]["api_key"]
where <name> is the store name (e.g. "all-debrid")
Args:
config: Configuration dict
service: Service name (default: "All-debrid")
Returns:
API key string if found, None otherwise
"""
@@ -490,21 +491,21 @@ def get_debrid_api_key(config: Dict[str, Any], service: str = "All-debrid") -> O
if isinstance(entry, str):
return entry.strip() or None
return None
def get_provider_credentials(config: Dict[str, Any], provider: str) -> Optional[Dict[str, str]]:
"""Get provider credentials (email/password) from config.
Supports both formats:
- New: config["provider"][provider] = {"email": "...", "password": "..."}
- Old: config[provider.capitalize()] = {"email": "...", "password": "..."}
Args:
config: Configuration dict
provider: Provider name (e.g., "openlibrary", "soulseek")
Returns:
Dict with credentials if found, None otherwise
"""
@@ -514,7 +515,7 @@ def get_provider_credentials(config: Dict[str, Any], provider: str) -> Optional[
creds = provider_config.get(provider.lower(), {})
if isinstance(creds, dict) and creds:
return creds
# Fall back to old format (capitalized key)
old_key_map = {
"openlibrary": "OpenLibrary",
@@ -526,11 +527,13 @@ def get_provider_credentials(config: Dict[str, Any], provider: str) -> Optional[
creds = config.get(old_key, {})
if isinstance(creds, dict) and creds:
return creds
return None
def resolve_cookies_path(config: Dict[str, Any], script_dir: Optional[Path] = None) -> Optional[Path]:
def resolve_cookies_path(
config: Dict[str, Any], script_dir: Optional[Path] = None
) -> Optional[Path]:
# Support both legacy top-level `cookies=...` and the modular conf style:
# [tool=ytdlp]
# cookies="C:\\path\\cookies.txt"
@@ -573,6 +576,7 @@ def resolve_cookies_path(config: Dict[str, Any], script_dir: Optional[Path] = No
return default_path
return None
def resolve_debug_log(config: Dict[str, Any]) -> Optional[Path]:
value = config.get("download_debug_log")
if not value:
@@ -582,7 +586,10 @@ def resolve_debug_log(config: Dict[str, Any]) -> Optional[Path]:
path = Path.cwd() / path
return path
def load_config(config_dir: Optional[Path] = None, filename: str = DEFAULT_CONFIG_FILENAME) -> Dict[str, Any]:
def load_config(
config_dir: Optional[Path] = None, filename: str = DEFAULT_CONFIG_FILENAME
) -> Dict[str, Any]:
base_dir = config_dir or SCRIPT_DIR
config_path = base_dir / filename
cache_key = _make_cache_key(config_dir, filename, config_path)
@@ -608,7 +615,9 @@ def load_config(config_dir: Optional[Path] = None, filename: str = DEFAULT_CONFI
return data
def reload_config(config_dir: Optional[Path] = None, filename: str = DEFAULT_CONFIG_FILENAME) -> Dict[str, Any]:
def reload_config(
config_dir: Optional[Path] = None, filename: str = DEFAULT_CONFIG_FILENAME
) -> Dict[str, Any]:
cache_key = _make_cache_key(config_dir, filename, None)
_CONFIG_CACHE.pop(cache_key, None)
return load_config(config_dir=config_dir, filename=filename)
@@ -617,6 +626,7 @@ def reload_config(config_dir: Optional[Path] = None, filename: str = DEFAULT_CON
def clear_config_cache() -> None:
_CONFIG_CACHE.clear()
def save_config(
config: Dict[str, Any],
config_dir: Optional[Path] = None,
@@ -626,7 +636,9 @@ def save_config(
config_path = base_dir / filename
if config_path.suffix.lower() != ".conf":
raise RuntimeError(f"Unsupported config format: {config_path.name} (only .conf is supported)")
raise RuntimeError(
f"Unsupported config format: {config_path.name} (only .conf is supported)"
)
try:
config_path.write_text(_serialize_conf(config), encoding="utf-8")
@@ -636,10 +648,12 @@ def save_config(
cache_key = _make_cache_key(config_dir, filename, config_path)
_CONFIG_CACHE[cache_key] = config
def load() -> Dict[str, Any]:
"""Return the parsed downlow configuration."""
return load_config()
def save(config: Dict[str, Any]) -> None:
"""Persist *config* back to disk."""
save_config(config)
+234
View File
@@ -0,0 +1,234 @@
# get-url Architecture & Flow
## Overview
The enhanced `get-url` command supports two modes:
```
get-url
├── SEARCH MODE (new)
│ └── -url "pattern"
│ ├── Normalize pattern (strip protocol, www)
│ ├── Search all stores
│ ├── Match URLs with wildcards
│ └── Return grouped results
└── ORIGINAL MODE (unchanged)
├── Hash lookup
├── Store lookup
└── Return URLs for file
```
## Flow Diagram: URL Search
```
User Input
v
get-url -url "youtube.com*"
v
_normalize_url_for_search()
│ Strips: https://, http://, www.
│ Result: "youtube.com*" (unchanged, already normalized)
v
_search_urls_across_stores()
├─→ Store 1 (Hydrus)
│ ├─→ search("*", limit=1000)
│ ├─→ get_url(file_hash) for each file
│ └─→ _match_url_pattern() for each URL
├─→ Store 2 (Folder)
│ ├─→ search("*", limit=1000)
│ ├─→ get_url(file_hash) for each file
│ └─→ _match_url_pattern() for each URL
└─→ ...more stores...
Matching URLs:
├─→ https://www.youtube.com/watch?v=123
├─→ http://youtube.com/shorts/abc
└─→ https://youtube.com/playlist?list=xyz
Normalized for matching:
├─→ youtube.com/watch?v=123 ✓ Matches "youtube.com*"
├─→ youtube.com/shorts/abc ✓ Matches "youtube.com*"
└─→ youtube.com/playlist?... ✓ Matches "youtube.com*"
v
Collect UrlItem results
├─→ UrlItem(url="https://www.youtube.com/watch?v=123",
│ hash="abcd1234...", store="hydrus")
├─→ UrlItem(url="http://youtube.com/shorts/abc",
│ hash="efgh5678...", store="folder")
└─→ ...more items...
v
Group by store
├─→ Hydrus
│ ├─→ https://www.youtube.com/watch?v=123
│ └─→ ...
└─→ Folder
├─→ http://youtube.com/shorts/abc
└─→ ...
v
Emit UrlItem objects for piping
v
Return exit code 0 (success)
```
## Code Structure
```
Get_Url (class)
├── __init__()
│ └── Register command with CLI
├── _normalize_url_for_search() [static]
│ └── Strip protocol & www, lowercase
├── _match_url_pattern() [static]
│ └── fnmatch with normalization
├── _search_urls_across_stores() [instance]
│ ├── Iterate stores
│ ├── Search files in store
│ ├── Get URLs for each file
│ ├── Apply pattern matching
│ └── Return (items, stores_found)
└── run() [main execution]
├── Check for -url flag
│ ├── YES: Search mode
│ │ └── _search_urls_across_stores()
│ └── NO: Original mode
│ └── Hash+store lookup
└── Return exit code
```
## Data Flow Examples
### Example 1: Search by Domain
```
Input: get-url -url "www.google.com"
Normalize: "google.com" (www. stripped)
Search Results:
Store "hydrus":
- https://www.google.com ✓
- https://google.com/search?q=hello ✓
- https://google.com/maps ✓
Store "folder":
- http://google.com ✓
- https://google.com/images ✓
Output: 5 matching URLs grouped by store
```
### Example 2: Wildcard Pattern
```
Input: get-url -url "youtube.com/watch*"
Pattern: "youtube.com/watch*"
Search Results:
Store "hydrus":
- https://www.youtube.com/watch?v=123 ✓
- https://youtube.com/watch?list=abc ✓
- https://www.youtube.com/shorts/xyz ✗ (doesn't match /watch*)
Store "folder":
- http://youtube.com/watch?v=456 ✓
Output: 3 matching URLs (watch only, not shorts)
```
### Example 3: Subdomain Wildcard
```
Input: get-url -url "*.example.com*"
Normalize: "*.example.com*" (already normalized)
Search Results:
Store "hydrus":
- https://cdn.example.com/video.mp4 ✓
- https://api.example.com/endpoint ✓
- https://www.example.com ✓
- https://other.org ✗
Output: 3 matching URLs
```
## Integration with Piping
```
# Search → Filter → Add Tag
get-url -url "youtube.com*" | add-tag -tag "video-source"
# Search → Count
get-url -url "reddit.com*" | wc -l
# Search → Export
get-url -url "github.com*" > github_urls.txt
```
## Error Handling Flow
```
get-url -url "pattern"
├─→ No stores configured?
│ └─→ Log "Error: No stores configured"
│ └─→ Return exit code 1
├─→ Store search fails?
│ └─→ Log error, skip store, continue
├─→ No matches found?
│ └─→ Log "No urls matching pattern"
│ └─→ Return exit code 1
└─→ Matches found?
└─→ Return exit code 0
```
## Performance Considerations
1. **Store Iteration**: Loops through all configured stores
2. **File Scanning**: Each store searches up to 1000 files
3. **URL Matching**: Each URL tested against pattern (fnmatch - O(n) per URL)
4. **Memory**: Stores all matching items in memory before display
Optimization opportunities:
- Cache store results
- Limit search scope with --store flag
- Early exit with --limit N
- Pagination support
## Backward Compatibility
Original mode (unchanged):
```
@1 | get-url
└─→ No -url flag
└─→ Use original logic
├─→ Get hash from result
├─→ Get store from result or args
├─→ Call backend.get_url(hash)
└─→ Return URLs for that file
```
All original functionality preserved. New -url flag is additive only.
+76
View File
@@ -0,0 +1,76 @@
# Quick Reference: get-url URL Search
## Basic Syntax
```bash
# Search mode (new)
get-url -url "pattern"
# Original mode (unchanged)
@1 | get-url
```
## Examples
### Exact domain match
```bash
get-url -url "google.com"
```
Matches: `https://www.google.com`, `http://google.com/search`, `https://google.com/maps`
### YouTube URL search
```bash
get-url -url "https://www.youtube.com/watch?v=xx_88TDWmEs"
```
Normalizes to: `youtube.com/watch?v=xx_88tdwmes`
Matches: Any video with same ID across different protocols
### Wildcard domain
```bash
get-url -url "youtube.com*"
```
Matches: All YouTube URLs (videos, shorts, playlists, etc.)
### Subdomain wildcard
```bash
get-url -url "*.example.com*"
```
Matches: `cdn.example.com`, `api.example.com`, `www.example.com`
### Specific path pattern
```bash
get-url -url "youtube.com/watch*"
```
Matches: Only YouTube watch URLs (not shorts or playlists)
### Single character wildcard
```bash
get-url -url "example.com/file?.mp4"
```
Matches: `example.com/file1.mp4`, `example.com/fileA.mp4` (not `file12.mp4`)
## How It Works
1. **Normalization**: Strips `https://`, `www.` prefix from pattern and all URLs
2. **Pattern Matching**: Uses `*` and `?` wildcards (case-insensitive)
3. **Search**: Scans all configured stores for matching URLs
4. **Results**: Groups matches by store, shows URL and hash
## Return Values
- Exit code **0** if matches found
- Exit code **1** if no matches or error
## Piping Results
```bash
get-url -url "youtube.com*" | grep -i video
get-url -url "example.com*" | add-tag -tag "external-source"
```
## Common Patterns
| Pattern | Matches | Notes |
|---------|---------|-------|
| `google.com` | Google URLs | Exact domain (after normalization) |
| `youtube.com*` | All YouTube | Wildcard at end |
| `*.example.com*` | Subdomains | Wildcard at start and end |
| `github.com/user*` | User repos | Path pattern |
| `reddit.com/r/*` | Subreddit | Path with wildcard |
+91
View File
@@ -0,0 +1,91 @@
# get-url Enhanced URL Search
The `get-url` command now supports searching for URLs across all stores with automatic protocol and `www` prefix stripping.
## Features
### 1. **Protocol Stripping**
URLs are normalized by removing:
- Protocol prefixes: `https://`, `http://`, `ftp://`, etc.
- `www.` prefix (case-insensitive)
### 2. **Wildcard Matching**
Patterns support standard wildcards:
- `*` - matches any sequence of characters
- `?` - matches any single character
### 3. **Case-Insensitive Matching**
All matching is case-insensitive for domains and paths
## Usage Examples
### Search by full domain
```bash
get-url -url "www.google.com"
# Matches:
# - https://www.google.com
# - http://google.com/search
# - https://google.com/maps
```
### Search with YouTube example
```bash
get-url -url "https://www.youtube.com/watch?v=xx_88TDWmEs"
# Becomes: youtube.com/watch?v=xx_88tdwmes
# Matches:
# - https://www.youtube.com/watch?v=xx_88TDWmEs
# - http://youtube.com/watch?v=xx_88TDWmEs
```
### Domain wildcard matching
```bash
get-url -url "youtube.com*"
# Matches any URL starting with youtube.com:
# - https://www.youtube.com/watch?v=123
# - https://youtube.com/shorts/abc
# - http://youtube.com/playlist?list=xyz
```
### Subdomain matching
```bash
get-url -url "*example.com*"
# Matches:
# - https://cdn.example.com/file.mp4
# - https://www.example.com
# - https://api.example.com/endpoint
```
### Specific path matching
```bash
get-url -url "youtube.com/watch*"
# Matches:
# - https://www.youtube.com/watch?v=123
# - http://youtube.com/watch?list=abc
# Does NOT match:
# - https://youtube.com/shorts/abc
```
## Get URLs for Specific File
The original functionality is still supported:
```bash
@1 | get-url
# Requires hash and store from piped result
```
## Output
Results are organized by store and show:
- **Store**: Backend name (hydrus, folder, etc.)
- **Url**: The full matched URL
- **Hash**: First 16 characters of the file hash (for compactness)
## Implementation Details
The search:
1. Iterates through all configured stores
2. Searches for all files in each store (limit 1000 per store)
3. Retrieves URLs for each file
4. Applies pattern matching with normalization
5. Returns results grouped by store
6. Emits `UrlItem` objects for piping to other commands
+1
View File
@@ -5,6 +5,7 @@ packaged distribution the preferred entry is `medeia_macina.cli_entry.main`.
When running from the repository (or in legacy installs) the module will
attempt to import `MedeiaCLI` from the top-level `CLI` module.
"""
import sys
from pathlib import Path
+4 -1
View File
@@ -3,6 +3,7 @@
This wraps the existing `medeia_entry.py` runner so installers can set
entry points to `medeia_macina.cli_entry:main`.
"""
from __future__ import annotations
from typing import Optional, List, Tuple
@@ -196,6 +197,7 @@ def _run_cli(clean_args: List[str]) -> int:
try:
_ensure_repo_root_on_sys_path()
from CLI import MedeiaCLI as _M # type: ignore
MedeiaCLI = _M
except Exception:
raise ImportError(
@@ -261,6 +263,7 @@ def main(argv: Optional[List[str]] = None) -> int:
# exit early to avoid confusing import-time errors later during startup.
try:
from SYS.env_check import ensure_urllib3_ok
try:
ensure_urllib3_ok(exit_on_error=True)
except SystemExit as exc:
@@ -305,4 +308,4 @@ def main(argv: Optional[List[str]] = None) -> int:
if __name__ == "__main__":
raise SystemExit(main())
raise SystemExit(main())
+1004 -802
View File
File diff suppressed because it is too large Load Diff
+98 -50
View File
@@ -33,10 +33,10 @@ from rich.progress import (
@dataclass(slots=True)
class PipeObject:
"""Unified pipeline object for tracking files, metadata, tag values, and relationships through the pipeline.
This is the single source of truth for all result data in the pipeline. Uses the hash+store
canonical pattern for file identification.
Attributes:
hash: SHA-256 hash of the file (canonical identifier)
store: Storage backend name (e.g., 'default', 'hydrus', 'test', 'home')
@@ -53,6 +53,7 @@ class PipeObject:
parent_hash: Hash of the parent file in the pipeline chain (for tracking provenance/lineage)
extra: Additional fields not covered above
"""
hash: str
store: str
provider: Optional[str] = None
@@ -72,21 +73,21 @@ class PipeObject:
def add_relationship(self, rel_type: str, rel_hash: str) -> None:
"""Add a relationship hash.
Args:
rel_type: Relationship type ('king', 'alt', 'related')
rel_hash: Hash to add to the relationship
"""
if rel_type not in self.relationships:
self.relationships[rel_type] = []
if isinstance(self.relationships[rel_type], list):
if rel_hash not in self.relationships[rel_type]:
self.relationships[rel_type].append(rel_hash)
else:
# Single value (e.g., king), convert to that value
self.relationships[rel_type] = rel_hash
def get_relationships(self) -> Dict[str, Any]:
"""Get all relationships for this object."""
return self.relationships.copy() if self.relationships else {}
@@ -114,7 +115,10 @@ class PipeObject:
cmdlet_name = "PipeObject"
try:
import pipeline as ctx
current = ctx.get_current_cmdlet_name("") if hasattr(ctx, "get_current_cmdlet_name") else ""
current = (
ctx.get_current_cmdlet_name("") if hasattr(ctx, "get_current_cmdlet_name") else ""
)
if current:
cmdlet_name = current
else:
@@ -145,7 +149,7 @@ class PipeObject:
if self.provider:
data["provider"] = self.provider
if self.tag:
data["tag"] = self.tag
if self.title:
@@ -170,7 +174,7 @@ class PipeObject:
data["action"] = self.action
if self.parent_hash:
data["parent_hash"] = self.parent_hash
# Add extra fields
data.update({k: v for k, v in self.extra.items() if v is not None})
return data
@@ -178,22 +182,22 @@ class PipeObject:
class FileRelationshipTracker:
"""Track relationships between files for sidecar creation.
Allows tagging files with their relationships to other files:
- king: The primary/master version of a file
- alt: Alternate versions of the same content
- related: Related files (e.g., screenshots of a book)
"""
def __init__(self) -> None:
self.relationships: Dict[str, Dict[str, Any]] = {}
def register_king(self, file_path: str, file_hash: str) -> None:
"""Register a file as the king (primary) version."""
if file_path not in self.relationships:
self.relationships[file_path] = {}
self.relationships[file_path]["king"] = file_hash
def add_alt(self, file_path: str, alt_hash: str) -> None:
"""Add an alternate version of a file."""
if file_path not in self.relationships:
@@ -202,7 +206,7 @@ class FileRelationshipTracker:
self.relationships[file_path]["alt"] = []
if alt_hash not in self.relationships[file_path]["alt"]:
self.relationships[file_path]["alt"].append(alt_hash)
def add_related(self, file_path: str, related_hash: str) -> None:
"""Add a related file."""
if file_path not in self.relationships:
@@ -211,14 +215,14 @@ class FileRelationshipTracker:
self.relationships[file_path]["related"] = []
if related_hash not in self.relationships[file_path]["related"]:
self.relationships[file_path]["related"].append(related_hash)
def get_relationships(self, file_path: str) -> Optional[Dict[str, Any]]:
"""Get relationships for a file."""
return self.relationships.get(file_path)
def link_files(self, primary_path: str, king_hash: str, *alt_paths: str) -> None:
"""Link files together with primary as king and others as alternates.
Args:
primary_path: Path to the primary file (will be marked as 'king')
king_hash: Hash of the primary file
@@ -231,6 +235,7 @@ class FileRelationshipTracker:
self.add_alt(primary_path, alt_hash)
except Exception as e:
import sys
print(f"Error hashing {alt_path}: {e}", file=sys.stderr)
@@ -245,6 +250,7 @@ def _get_file_hash(filepath: str) -> str:
# ============= Download Module Classes =============
class DownloadError(RuntimeError):
"""Raised when the download or Hydrus import fails."""
@@ -252,9 +258,10 @@ class DownloadError(RuntimeError):
@dataclass(slots=True)
class DownloadOptions:
"""Configuration for downloading media.
Use the add-file cmdlet separately for Hydrus import.
"""
url: str
mode: str # "audio" or "video"
output_dir: Path
@@ -273,13 +280,14 @@ class DownloadOptions:
class SendFunc(Protocol):
"""Protocol for event sender function."""
def __call__(self, event: str, **payload: Any) -> None:
...
def __call__(self, event: str, **payload: Any) -> None: ...
@dataclass(slots=True)
class DownloadMediaResult:
"""Result of a successful media download."""
path: Path
info: Dict[str, Any]
tag: List[str]
@@ -291,6 +299,7 @@ class DownloadMediaResult:
@dataclass(slots=True)
class DebugLogger:
"""Logs events to a JSON debug file for troubleshooting downloads."""
path: Path
file: Optional[TextIO] = None
session_started: bool = False
@@ -383,8 +392,7 @@ def _sanitise_for_json(value: Any, *, max_depth: int = 8, _seen: Optional[set[in
if isinstance(value, (list, tuple, set)):
iterable = value if not isinstance(value, set) else list(value)
return [
_sanitise_for_json(item, max_depth=max_depth - 1, _seen=_seen)
for item in iterable
_sanitise_for_json(item, max_depth=max_depth - 1, _seen=_seen) for item in iterable
]
if is_dataclass(value) and not isinstance(value, type):
return _sanitise_for_json(asdict(value), max_depth=max_depth - 1, _seen=_seen)
@@ -393,6 +401,7 @@ def _sanitise_for_json(value: Any, *, max_depth: int = 8, _seen: Optional[set[in
return repr(value)
class ProgressBar:
"""Rich progress helper for byte-based transfers.
@@ -419,7 +428,9 @@ class ProgressBar:
# Pipeline-backed transfer task is already registered; update its total if needed.
try:
if total is not None and total > 0:
self._pipeline_ui.update_transfer(label=self._pipeline_label, completed=None, total=int(total))
self._pipeline_ui.update_transfer(
label=self._pipeline_label, completed=None, total=int(total)
)
except Exception:
pass
return
@@ -438,7 +449,10 @@ class ProgressBar:
self._pipeline_ui = ui
self._pipeline_label = str(label or "download")
try:
ui.begin_transfer(label=self._pipeline_label, total=int(total) if isinstance(total, int) and total > 0 else None)
ui.begin_transfer(
label=self._pipeline_label,
total=int(total) if isinstance(total, int) and total > 0 else None,
)
except Exception:
# If pipeline integration fails, fall back to standalone progress.
self._pipeline_ui = None
@@ -503,7 +517,9 @@ class ProgressBar:
if self._progress is None or self._task_id is None:
return
if total is not None and total > 0:
self._progress.update(self._task_id, completed=int(downloaded or 0), total=int(total), refresh=True)
self._progress.update(
self._task_id, completed=int(downloaded or 0), total=int(total), refresh=True
)
else:
self._progress.update(self._task_id, completed=int(downloaded or 0), refresh=True)
@@ -528,21 +544,21 @@ class ProgressBar:
def format_bytes(self, bytes_val: Optional[float]) -> str:
"""Format bytes to human-readable size.
Args:
bytes_val: Number of bytes or None.
Returns:
Formatted string (e.g., "123.4 MB", "1.2 GB").
"""
if bytes_val is None or bytes_val <= 0:
return "?.? B"
for unit in ("B", "KB", "MB", "GB", "TB"):
if bytes_val < 1024:
return f"{bytes_val:.1f} {unit}"
bytes_val /= 1024
return f"{bytes_val:.1f} PB"
# NOTE: rich.Progress handles the visual formatting; format_bytes remains as a general utility.
@@ -555,7 +571,14 @@ class ProgressFileReader:
Progress is written to stderr (so pipelines remain clean).
"""
def __init__(self, fileobj: Any, *, total_bytes: Optional[int], label: str = "upload", min_interval_s: float = 0.25):
def __init__(
self,
fileobj: Any,
*,
total_bytes: Optional[int],
label: str = "upload",
min_interval_s: float = 0.25,
):
self._f = fileobj
self._total = int(total_bytes) if total_bytes not in (None, 0, "") else 0
self._label = str(label or "upload")
@@ -574,7 +597,12 @@ class ProgressFileReader:
now = time.time()
if now - self._last < self._min_interval_s:
return
self._bar.update(downloaded=int(self._read), total=int(self._total), label=str(self._label or "upload"), file=sys.stderr)
self._bar.update(
downloaded=int(self._read),
total=int(self._total),
label=str(self._label or "upload"),
file=sys.stderr,
)
self._last = now
def _finish(self) -> None:
@@ -868,7 +896,13 @@ class PipelineLiveProgress:
return
if self._live is not None:
return
if self._console is None or self._pipe_progress is None or self._subtasks is None or self._transfers is None or self._overall is None:
if (
self._console is None
or self._pipe_progress is None
or self._subtasks is None
or self._transfers is None
or self._overall is None
):
# Not initialized yet; start fresh.
self.start()
return
@@ -1081,7 +1115,9 @@ class PipelineLiveProgress:
except Exception:
pass
def update_transfer(self, *, label: str, completed: Optional[int], total: Optional[int] = None) -> None:
def update_transfer(
self, *, label: str, completed: Optional[int], total: Optional[int] = None
) -> None:
if not self._enabled:
return
if self._transfers is None:
@@ -1123,7 +1159,9 @@ class PipelineLiveProgress:
return False
return True
def begin_pipe(self, pipe_index: int, *, total_items: int, items_preview: Optional[List[Any]] = None) -> None:
def begin_pipe(
self, pipe_index: int, *, total_items: int, items_preview: Optional[List[Any]] = None
) -> None:
if not self._ensure_pipe(pipe_index):
return
pipe_progress = self._pipe_progress
@@ -1321,7 +1359,11 @@ class PipelineLiveProgress:
if self._overall_task is not None:
completed = 0
try:
completed = sum(1 for i in range(len(self._pipe_labels)) if self._pipe_done[i] >= max(1, self._pipe_totals[i]))
completed = sum(
1
for i in range(len(self._pipe_labels))
if self._pipe_done[i] >= max(1, self._pipe_totals[i])
)
except Exception:
completed = 0
overall.update(
@@ -1330,6 +1372,7 @@ class PipelineLiveProgress:
description=f"Pipeline: {completed}/{len(self._pipe_labels)} pipes completed",
)
class PipelineStageContext:
"""Context information for the current pipeline stage."""
@@ -1343,7 +1386,7 @@ class PipelineStageContext:
):
self.stage_index = stage_index
self.total_stages = total_stages
self.is_last_stage = (stage_index == total_stages - 1)
self.is_last_stage = stage_index == total_stages - 1
self.pipe_index = int(pipe_index) if pipe_index is not None else None
self.worker_id = worker_id
self._on_emit = on_emit
@@ -1377,13 +1420,14 @@ class PipelineStageContext:
# Consolidated from result_table.py
# ============================================================================
@dataclass
class InputOption:
"""Represents an interactive input option (cmdlet argument) in a table.
Allows users to select options that translate to cmdlet arguments,
enabling interactive configuration right from the result table.
Example:
# Create an option for location selection
location_opt = InputOption(
@@ -1392,11 +1436,12 @@ class InputOption:
choices=["local", "hydrus", "0x0"],
description="Download destination"
)
# Use in result table
table.add_input_option(location_opt)
selected = table.select_option("location") # Returns user choice
"""
name: str
"""Option name (maps to cmdlet argument)"""
type: str = "string"
@@ -1409,7 +1454,7 @@ class InputOption:
"""Description of what this option does"""
validator: Optional[Callable[[str], bool]] = None
"""Optional validator function: takes value, returns True if valid"""
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary."""
return {
@@ -1424,10 +1469,11 @@ class InputOption:
@dataclass
class TUIResultCard:
"""Represents a result as a UI card with title, metadata, and actions.
Used in hub-ui and TUI contexts to render individual search results
Used in hub-ui and TUI contexts to render individual search results
as grouped components with visual structure.
"""
title: str
subtitle: Optional[str] = None
metadata: Optional[Dict[str, str]] = None
@@ -1436,7 +1482,7 @@ class TUIResultCard:
file_hash: Optional[str] = None
file_size: Optional[str] = None
duration: Optional[str] = None
def __post_init__(self):
"""Initialize default values."""
if self.metadata is None:
@@ -1448,14 +1494,15 @@ class TUIResultCard:
@dataclass
class ResultColumn:
"""Represents a single column in a result table."""
name: str
value: str
width: Optional[int] = None
def __str__(self) -> str:
"""String representation of the column."""
return f"{self.name}: {self.value}"
def to_dict(self) -> Dict[str, str]:
"""Convert to dictionary."""
return {"name": self.name, "value": self.value}
@@ -1464,28 +1511,29 @@ class ResultColumn:
@dataclass
class ResultRow:
"""Represents a single row in a result table."""
columns: List[ResultColumn] = field(default_factory=list)
def add_column(self, name: str, value: Any) -> None:
"""Add a column to this row."""
str_value = str(value) if value is not None else ""
self.columns.append(ResultColumn(name, str_value))
def get_column(self, name: str) -> Optional[str]:
"""Get column value by name."""
for col in self.columns:
if col.name.lower() == name.lower():
return col.value
return None
def to_dict(self) -> List[Dict[str, str]]:
"""Convert to list of column dicts."""
return [col.to_dict() for col in self.columns]
def to_list(self) -> List[tuple[str, str]]:
"""Convert to list of (name, value) tuples."""
return [(col.name, col.value) for col in self.columns]
def __str__(self) -> str:
"""String representation of the row."""
return " | ".join(str(col) for col in self.columns)
return " | ".join(str(col) for col in self.columns)
+708 -681
View File
File diff suppressed because it is too large Load Diff
+387 -298
View File
File diff suppressed because it is too large Load Diff
+109 -58
View File
@@ -88,7 +88,16 @@ def run_platform_bootstrap(repo_root: Path) -> int:
if not exe:
print("PowerShell not found; cannot run bootstrap.ps1", file=sys.stderr)
return 1
cmd = [exe, "-NoProfile", "-NonInteractive", "-ExecutionPolicy", "Bypass", "-File", str(ps1), "-Quiet"]
cmd = [
exe,
"-NoProfile",
"-NonInteractive",
"-ExecutionPolicy",
"Bypass",
"-File",
str(ps1),
"-Quiet",
]
elif sh_script.exists():
shell = _find_shell()
if not shell:
@@ -189,15 +198,23 @@ def _install_deno(version: str | None = None) -> int:
def main() -> int:
parser = argparse.ArgumentParser(description="Bootstrap Medios-Macina: install deps and Playwright browsers")
parser.add_argument(
"--skip-deps", action="store_true", help="Skip installing Python dependencies from requirements.txt"
parser = argparse.ArgumentParser(
description="Bootstrap Medios-Macina: install deps and Playwright browsers"
)
parser.add_argument(
"--no-playwright", action="store_true", help="Skip running 'playwright install' (only install packages)"
"--skip-deps",
action="store_true",
help="Skip installing Python dependencies from requirements.txt",
)
parser.add_argument(
"--playwright-only", action="store_true", help="Only run 'playwright install' (skips dependency installation)"
"--no-playwright",
action="store_true",
help="Skip running 'playwright install' (only install packages)",
)
parser.add_argument(
"--playwright-only",
action="store_true",
help="Only run 'playwright install' (skips dependency installation)",
)
parser.add_argument(
"--browsers",
@@ -212,12 +229,23 @@ def main() -> int:
)
deno_group = parser.add_mutually_exclusive_group()
deno_group.add_argument(
"--install-deno", action="store_true", help="Install the Deno runtime (default behavior; kept for explicitness)"
"--install-deno",
action="store_true",
help="Install the Deno runtime (default behavior; kept for explicitness)",
)
deno_group.add_argument(
"--no-deno", action="store_true", help="Skip installing Deno runtime (opt out)"
)
deno_group.add_argument("--no-deno", action="store_true", help="Skip installing Deno runtime (opt out)")
parser.add_argument("--deno-version", type=str, default=None, help="Specific Deno version to install (e.g., v1.34.3)")
parser.add_argument(
"--upgrade-pip", action="store_true", help="Upgrade pip/setuptools/wheel before installing requirements"
"--deno-version",
type=str,
default=None,
help="Specific Deno version to install (e.g., v1.34.3)",
)
parser.add_argument(
"--upgrade-pip",
action="store_true",
help="Upgrade pip/setuptools/wheel before installing requirements",
)
args = parser.parse_args()
@@ -297,12 +325,26 @@ def main() -> int:
if args.upgrade_pip:
print("Upgrading pip, setuptools, and wheel in local venv...")
run([str(venv_python), "-m", "pip", "install", "--upgrade", "pip", "setuptools", "wheel"])
run(
[
str(venv_python),
"-m",
"pip",
"install",
"--upgrade",
"pip",
"setuptools",
"wheel",
]
)
if not args.skip_deps:
req_file = repo_root / "requirements.txt"
if not req_file.exists():
print(f"requirements.txt not found at {req_file}; skipping dependency installation.", file=sys.stderr)
print(
f"requirements.txt not found at {req_file}; skipping dependency installation.",
file=sys.stderr,
)
else:
print(f"Installing Python dependencies into local venv from {req_file}...")
run([str(venv_python), "-m", "pip", "install", "-r", str(req_file)])
@@ -360,7 +402,9 @@ def main() -> int:
site_dir = Path(sp)
break
if site_dir is None:
print("Could not determine venv site-packages directory; skipping .pth fallback")
print(
"Could not determine venv site-packages directory; skipping .pth fallback"
)
else:
pth_file = site_dir / "medeia_repo.pth"
if pth_file.exists():
@@ -378,7 +422,12 @@ def main() -> int:
# Re-check whether CLI can be imported now
rc2 = subprocess.run(
[str(venv_python), "-c", "import importlib; importlib.import_module('CLI')"], check=False
[
str(venv_python),
"-c",
"import importlib; importlib.import_module('CLI')",
],
check=False,
)
if rc2.returncode == 0:
print("Top-level 'CLI' import works after adding .pth")
@@ -452,8 +501,8 @@ python -m medeia_macina.cli_entry @args
"@echo off\r\n"
"set SCRIPT_DIR=%~dp0\r\n"
"set PATH=%SCRIPT_DIR%\\.venv\\Scripts;%PATH%\r\n"
"if exist \"%SCRIPT_DIR%\\.venv\\Scripts\\python.exe\" \"%SCRIPT_DIR%\\.venv\\Scripts\\python.exe\" -m medeia_macina.cli_entry %*\r\n"
"if exist \"%SCRIPT_DIR%\\CLI.py\" python \"%SCRIPT_DIR%\\CLI.py\" %*\r\n"
'if exist "%SCRIPT_DIR%\\.venv\\Scripts\\python.exe" "%SCRIPT_DIR%\\.venv\\Scripts\\python.exe" -m medeia_macina.cli_entry %*\r\n'
'if exist "%SCRIPT_DIR%\\CLI.py" python "%SCRIPT_DIR%\\CLI.py" %*\r\n'
"python -m medeia_macina.cli_entry %*\r\n"
)
try:
@@ -478,12 +527,12 @@ python -m medeia_macina.cli_entry @args
cmd_text = (
f"@echo off\r\n"
f"set REPO={repo}\r\n"
f"if exist \"%REPO%\\.venv\\Scripts\\mm.exe\" \"%REPO%\\.venv\\Scripts\\mm.exe\" %*\r\n"
f'if exist "%REPO%\\.venv\\Scripts\\mm.exe" "%REPO%\\.venv\\Scripts\\mm.exe" %*\r\n'
f"if defined MM_DEBUG (\r\n"
f" echo MM_DEBUG: REPO=%REPO%\r\n"
f" if exist \"%REPO%\\.venv\\Scripts\\python.exe\" \"%REPO%\\.venv\\Scripts\\python.exe\" -c \"import sys,importlib,importlib.util; print('sys.executable:', sys.executable); print('sys.path (first 8):', sys.path[:8]);\" \r\n"
f' if exist "%REPO%\\.venv\\Scripts\\python.exe" "%REPO%\\.venv\\Scripts\\python.exe" -c "import sys,importlib,importlib.util; print(\'sys.executable:\', sys.executable); print(\'sys.path (first 8):\', sys.path[:8]);" \r\n'
f")\r\n"
f"if exist \"%REPO%\\.venv\\Scripts\\python.exe\" \"%REPO%\\.venv\\Scripts\\python.exe\" -m medeia_macina.cli_entry %*\r\n"
f'if exist "%REPO%\\.venv\\Scripts\\python.exe" "%REPO%\\.venv\\Scripts\\python.exe" -m medeia_macina.cli_entry %*\r\n'
f"python -m medeia_macina.cli_entry %*\r\n"
)
if mm_cmd.exists():
@@ -495,14 +544,14 @@ python -m medeia_macina.cli_entry @args
mm_ps1 = user_bin / "mm.ps1"
ps1_text = (
"Param([Parameter(ValueFromRemainingArguments=$true)] $args)\n"
f"$repo = \"{repo}\"\n"
f'$repo = "{repo}"\n'
"$venv = Join-Path $repo '.venv'\n"
"$exe = Join-Path $venv 'Scripts\\mm.exe'\n"
"if (Test-Path $exe) { & $exe @args; exit $LASTEXITCODE }\n"
"$py = Join-Path $venv 'Scripts\\python.exe'\n"
"if (Test-Path $py) {\n"
" if ($env:MM_DEBUG) {\n"
" Write-Host \"MM_DEBUG: diagnostics\" -ForegroundColor Yellow\n"
' Write-Host "MM_DEBUG: diagnostics" -ForegroundColor Yellow\n'
" & $py -c \"import sys,importlib,importlib.util,traceback; print('sys.executable:', sys.executable); print('sys.path (first 8):', sys.path[:8]);\"\n"
" }\n"
" & $py -m medeia_macina.cli_entry @args; exit $LASTEXITCODE\n"
@@ -524,7 +573,9 @@ python -m medeia_macina.cli_entry @args
"$cur = [Environment]::GetEnvironmentVariable('PATH','User');"
"if ($cur -notlike \"*$bin*\") {[Environment]::SetEnvironmentVariable('PATH', ($bin + ';' + ($cur -ne $null ? $cur : '')), 'User')}"
).format(bin=str_bin.replace("\\", "\\\\"))
subprocess.run(["powershell", "-NoProfile", "-Command", ps_cmd], check=False)
subprocess.run(
["powershell", "-NoProfile", "-Command", ps_cmd], check=False
)
except Exception:
pass
@@ -539,64 +590,64 @@ python -m medeia_macina.cli_entry @args
sh_text = (
"#!/usr/bin/env bash\n"
"set -e\n"
f"REPO=\"{repo}\"\n"
f'REPO="{repo}"\n'
"# Prefer git top-level when available to avoid embedding a parent path.\n"
"if command -v git >/dev/null 2>&1; then\n"
" gitroot=$(git -C \"$REPO\" rev-parse --show-toplevel 2>/dev/null || true)\n"
" if [ -n \"$gitroot\" ]; then\n"
" REPO=\"$gitroot\"\n"
' gitroot=$(git -C "$REPO" rev-parse --show-toplevel 2>/dev/null || true)\n'
' if [ -n "$gitroot" ]; then\n'
' REPO="$gitroot"\n'
" fi\n"
"fi\n"
"# If git not available or didn't resolve, walk up from CWD to find a project root.\n"
"if [ ! -f \"$REPO/CLI.py\" ] && [ ! -f \"$REPO/pyproject.toml\" ]; then\n"
" CUR=\"$(pwd -P)\"\n"
" while [ \"$CUR\" != \"/\" ] && [ \"$CUR\" != \"\" ]; do\n"
" if [ -f \"$CUR/CLI.py\" ] || [ -f \"$CUR/pyproject.toml\" ]; then\n"
" REPO=\"$CUR\"\n"
'if [ ! -f "$REPO/CLI.py" ] && [ ! -f "$REPO/pyproject.toml" ]; then\n'
' CUR="$(pwd -P)"\n'
' while [ "$CUR" != "/" ] && [ "$CUR" != "" ]; do\n'
' if [ -f "$CUR/CLI.py" ] || [ -f "$CUR/pyproject.toml" ]; then\n'
' REPO="$CUR"\n'
" break\n"
" fi\n"
" CUR=\"$(dirname \"$CUR\")\"\n"
' CUR="$(dirname "$CUR")"\n'
" done\n"
"fi\n"
"VENV=\"$REPO/.venv\"\n"
'VENV="$REPO/.venv"\n'
"# Debug mode: set MM_DEBUG=1 to print repository, venv, and import diagnostics\n"
"if [ -n \"${MM_DEBUG:-}\" ]; then\n"
" echo \"MM_DEBUG: diagnostics\" >&2\n"
" echo \"Resolved REPO: $REPO\" >&2\n"
" echo \"Resolved VENV: $VENV\" >&2\n"
" echo \"VENV exists: $( [ -d \"$VENV\" ] && echo yes || echo no )\" >&2\n"
" echo \"Candidates:\" >&2\n"
" echo \" VENV/bin/mm: $( [ -x \"$VENV/bin/mm\" ] && echo yes || echo no )\" >&2\n"
" echo \" VENV/bin/python3: $( [ -x \"$VENV/bin/python3\" ] && echo yes || echo no )\" >&2\n"
" echo \" VENV/bin/python: $( [ -x \"$VENV/bin/python\" ] && echo yes || echo no )\" >&2\n"
" echo \" system python3: $(command -v python3 || echo none)\" >&2\n"
" echo \" system python: $(command -v python || echo none)\" >&2\n"
" for pycmd in \"$VENV/bin/python3\" \"$VENV/bin/python\" \"$(command -v python3 2>/dev/null)\" \"$(command -v python 2>/dev/null)\"; do\n"
" if [ -n \"$pycmd\" ] && [ -x \"$pycmd\" ]; then\n"
" echo \"---- Testing with: $pycmd ----\" >&2\n"
'if [ -n "${MM_DEBUG:-}" ]; then\n'
' echo "MM_DEBUG: diagnostics" >&2\n'
' echo "Resolved REPO: $REPO" >&2\n'
' echo "Resolved VENV: $VENV" >&2\n'
' echo "VENV exists: $( [ -d "$VENV" ] && echo yes || echo no )" >&2\n'
' echo "Candidates:" >&2\n'
' echo " VENV/bin/mm: $( [ -x "$VENV/bin/mm" ] && echo yes || echo no )" >&2\n'
' echo " VENV/bin/python3: $( [ -x "$VENV/bin/python3" ] && echo yes || echo no )" >&2\n'
' echo " VENV/bin/python: $( [ -x "$VENV/bin/python" ] && echo yes || echo no )" >&2\n'
' echo " system python3: $(command -v python3 || echo none)" >&2\n'
' echo " system python: $(command -v python || echo none)" >&2\n'
' for pycmd in "$VENV/bin/python3" "$VENV/bin/python" "$(command -v python3 2>/dev/null)" "$(command -v python 2>/dev/null)"; do\n'
' if [ -n "$pycmd" ] && [ -x "$pycmd" ]; then\n'
' echo "---- Testing with: $pycmd ----" >&2\n'
" $pycmd - <<'PY'\nimport sys, importlib, traceback, importlib.util\nprint('sys.executable:', sys.executable)\nprint('sys.path (first 8):', sys.path[:8])\nfor mod in ('CLI','medeia_macina','medeia_macina.cli_entry'):\n try:\n spec = importlib.util.find_spec(mod)\n print(mod, 'spec:', spec)\n if spec:\n m = importlib.import_module(mod)\n print(mod, 'loaded at', getattr(m, '__file__', None))\n except Exception:\n print(mod, 'import failed')\n traceback.print_exc()\nPY\n"
" fi\n"
" done\n"
" echo \"MM_DEBUG: end diagnostics\" >&2\n"
' echo "MM_DEBUG: end diagnostics" >&2\n'
"fi\n"
"# Packaged console script in the venv if available\n"
"if [ -x \"$VENV/bin/mm\" ]; then\n"
" exec \"$VENV/bin/mm\" \"$@\"\n"
'if [ -x "$VENV/bin/mm" ]; then\n'
' exec "$VENV/bin/mm" "$@"\n'
"fi\n"
"# Prefer venv's python3, then venv's python\n"
"if [ -x \"$VENV/bin/python3\" ]; then\n"
" exec \"$VENV/bin/python3\" -m medeia_macina.cli_entry \"$@\"\n"
'if [ -x "$VENV/bin/python3" ]; then\n'
' exec "$VENV/bin/python3" -m medeia_macina.cli_entry "$@"\n'
"fi\n"
"if [ -x \"$VENV/bin/python\" ]; then\n"
" exec \"$VENV/bin/python\" -m medeia_macina.cli_entry \"$@\"\n"
'if [ -x "$VENV/bin/python" ]; then\n'
' exec "$VENV/bin/python" -m medeia_macina.cli_entry "$@"\n'
"fi\n"
"# Fallback to system python3, then system python (only if it's Python 3)\n"
"if command -v python3 >/dev/null 2>&1; then\n"
" exec python3 -m medeia_macina.cli_entry \"$@\"\n"
' exec python3 -m medeia_macina.cli_entry "$@"\n'
"fi\n"
"if command -v python >/dev/null 2>&1; then\n"
" if python -c 'import sys; sys.exit(0 if sys.version_info[0] >= 3 else 1)'; then\n"
" exec python -m medeia_macina.cli_entry \"$@\"\n"
' exec python -m medeia_macina.cli_entry "$@"\n'
" fi\n"
"fi\n"
"echo 'Error: no suitable Python 3 interpreter found. Please install Python 3 or use the venv.' >&2\n"
@@ -614,8 +665,8 @@ python -m medeia_macina.cli_entry @args
profile = home / ".profile"
snippet = (
"# Added by Medeia-Macina setup: ensure user local bin is on PATH\n"
"if [ -d \"$HOME/.local/bin\" ] && [[ \":$PATH:\" != *\":$HOME/.local/bin:\"* ]]; then\n"
" PATH=\"$HOME/.local/bin:$PATH\"\n"
'if [ -d "$HOME/.local/bin" ] && [[ ":$PATH:" != *":$HOME/.local/bin:"* ]]; then\n'
' PATH="$HOME/.local/bin:$PATH"\n'
"fi\n"
)
try:
+880 -50
View File
File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More