df
smoke-mm / Install & smoke test mm --help (push) Has been cancelled

This commit is contained in:
2025-12-29 17:05:03 -08:00
parent 226de9316a
commit c019c00aed
104 changed files with 19669 additions and 12954 deletions
+12
View File
@@ -0,0 +1,12 @@
{
"editor.formatOnSave": true,
"editor.defaultFormatter": "ms-python.python",
"python.formatting.provider": "black",
"python.formatting.blackArgs": ["--line-length", "88"],
"[python]": {
"editor.defaultFormatter": "ms-python.python",
"editor.formatOnSave": true
}
}
+16 -18
View File
@@ -182,12 +182,7 @@ class HTTPClient:
headers=headers,
)
def request(
self,
method: str,
url: str,
**kwargs
) -> httpx.Response:
def request(self, method: str, url: str, **kwargs) -> httpx.Response:
"""
Make a generic HTTP request.
@@ -260,7 +255,7 @@ class HTTPClient:
url: str,
raise_for_status: bool = True,
log_http_errors: bool = True,
**kwargs
**kwargs,
) -> httpx.Response:
"""
Make an HTTP request with automatic retries.
@@ -312,12 +307,16 @@ class HTTPClient:
response_text = e.response.text[:200]
except:
response_text = "<unable to read response>"
logger.warning(f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}")
logger.warning(
f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}"
)
if attempt < self.retries - 1:
continue
except (httpx.RequestError, httpx.ConnectError) as e:
last_exception = e
logger.warning(f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}")
logger.warning(
f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}"
)
if attempt < self.retries - 1:
continue
@@ -485,12 +484,7 @@ class AsyncHTTPClient:
return path
async def _request(
self,
method: str,
url: str,
**kwargs
) -> httpx.Response:
async def _request(self, method: str, url: str, **kwargs) -> httpx.Response:
"""
Make an async HTTP request with automatic retries.
@@ -540,13 +534,17 @@ class AsyncHTTPClient:
response_text = e.response.text[:200]
except:
response_text = "<unable to read response>"
logger.warning(f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}")
logger.warning(
f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}"
)
if attempt < self.retries - 1:
await asyncio.sleep(0.5)
continue
except (httpx.RequestError, httpx.ConnectError) as e:
last_exception = e
logger.warning(f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}")
logger.warning(
f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}"
)
if attempt < self.retries - 1:
await asyncio.sleep(0.5)
continue
@@ -590,7 +588,7 @@ def download(
url: str,
file_path: str,
progress_callback: Optional[Callable[[int, int], None]] = None,
**kwargs
**kwargs,
) -> Path:
"""Quick file download without context manager."""
with HTTPClient() as client:
+299 -235
View File
@@ -1,4 +1,5 @@
"""Hydrus API helpers and export utilities."""
from __future__ import annotations
import base64
@@ -70,7 +71,7 @@ class HydrusNetwork:
url: str
access_key: str = ""
timeout: float = 60.0
timeout: float = 9.0
instance_name: str = "" # Optional store name (e.g., 'home') for namespaced logs
scheme: str = field(init=False)
@@ -127,7 +128,9 @@ class HydrusNetwork:
url = f"{self.scheme}://{self.hostname}:{self.port}{path}"
# Log request details
logger.debug(f"{self._log_prefix()} {spec.method} {spec.endpoint} (auth: {'session_key' if self._session_key else 'access_key' if self.access_key else 'none'})")
logger.debug(
f"{self._log_prefix()} {spec.method} {spec.endpoint} (auth: {'session_key' if self._session_key else 'access_key' if self.access_key else 'none'})"
)
status = 0
reason = ""
@@ -153,7 +156,9 @@ class HydrusNetwork:
# Let httpx choose chunked transfer encoding for safety.
headers.pop("Content-Length", None)
logger.debug(f"{self._log_prefix()} Uploading file {file_path.name} ({file_size} bytes)")
logger.debug(
f"{self._log_prefix()} Uploading file {file_path.name} ({file_size} bytes)"
)
# Stream upload body with a stderr progress bar (pipeline-safe).
from models import ProgressBar
@@ -173,7 +178,12 @@ class HydrusNetwork:
if not final and (now - float(last_render_t[0])) < 0.25:
return
last_render_t[0] = now
bar.update(downloaded=int(sent[0]), total=int(file_size), label=str(label), file=sys.stderr)
bar.update(
downloaded=int(sent[0]),
total=int(file_size),
label=str(label),
file=sys.stderr,
)
if final:
bar.finish()
@@ -210,7 +220,9 @@ class HydrusNetwork:
# httpx will usually set this automatically, but we set it explicitly to
# match the Hydrus API docs and avoid edge cases.
headers.setdefault("Content-Type", "application/json")
logger.debug(f"{self._log_prefix()} Request body size: {len(content) if content else 'json'}")
logger.debug(
f"{self._log_prefix()} Request body size: {len(content) if content else 'json'}"
)
response = client.request(
spec.method,
@@ -271,7 +283,9 @@ class HydrusNetwork:
# Handle expired session key (419) by clearing cache and retrying once
if status == 419 and self._session_key and "session" in message.lower():
logger.warning(f"{self._log_prefix()} Session key expired, acquiring new one and retrying...")
logger.warning(
f"{self._log_prefix()} Session key expired, acquiring new one and retrying..."
)
self._session_key = "" # Clear expired session key
try:
self._acquire_session_key()
@@ -308,7 +322,9 @@ class HydrusNetwork:
session_key = result.get("session_key")
if not session_key:
raise HydrusRequestError(500, "Session key response missing 'session_key' field", result)
raise HydrusRequestError(
500, "Session key response missing 'session_key' field", result
)
self._session_key = session_key
return session_key
@@ -341,7 +357,9 @@ class HydrusNetwork:
file_path: Path | None = None,
content_type: str | None = None,
) -> dict[str, Any]:
spec = HydrusRequestSpec("POST", endpoint, data=data, file_path=file_path, content_type=content_type)
spec = HydrusRequestSpec(
"POST", endpoint, data=data, file_path=file_path, content_type=content_type
)
return cast(dict[str, Any], self._perform_request(spec))
def _ensure_hashes(self, hash: Union[str, Iterable[str]]) -> list[str]:
@@ -356,11 +374,20 @@ class HydrusNetwork:
# Use the correct parameter name for Hydrus API compatibility
return f"{url}{separator}access_key={quote(self.access_key)}"
# ------------------------------------------------------------------
# public API wrappers
# ------------------------------------------------------------------
def add_file(self, path: Union[str, Path]) -> dict[str, Any]:
"""Add a file to Hydrus using the octet-stream upload mode.
def add_file(self, file_path: Path) -> dict[str, Any]:
This mirrors the Hydrus API POST /add_files/add_file behavior when sending
the file bytes as the POST body. The method accepts either a filesystem
`Path` or a string path and will raise FileNotFoundError if the target
path is not a readable file.
"""
# Accept both Path and str for convenience
file_path = Path(path) if not isinstance(path, Path) else path
if not file_path.is_file():
raise FileNotFoundError(f"Upload file not found: {file_path}")
# Forward as file_path so the request body is streamed as application/octet-stream
return self._post("/add_files/add_file", file_path=file_path)
def undelete_files(self, hashes: Union[str, Iterable[str]]) -> dict[str, Any]:
@@ -373,7 +400,9 @@ class HydrusNetwork:
body = {"hashes": hash_list}
return self._post("/add_files/undelete_files", data=body)
def delete_files(self, hashes: Union[str, Iterable[str]], *, reason: str | None = None) -> dict[str, Any]:
def delete_files(
self, hashes: Union[str, Iterable[str]], *, reason: str | None = None
) -> dict[str, Any]:
"""Delete files in Hydrus.
Hydrus Client API: POST /add_files/delete_files
@@ -396,7 +425,9 @@ class HydrusNetwork:
body = {"hashes": hash_list}
return self._post("/add_files/clear_file_deletion_record", data=body)
def add_tag(self, hash: Union[str, Iterable[str]], tags: Iterable[str], service_name: str) -> dict[str, Any]:
def add_tag(
self, hash: Union[str, Iterable[str]], tags: Iterable[str], service_name: str
) -> dict[str, Any]:
hash = self._ensure_hashes(hash)
body = {"hashes": hash, "service_names_to_tags": {service_name: list(tags)}}
return self._post("/add_tags/add_tags", data=body)
@@ -416,7 +447,9 @@ class HydrusNetwork:
}
return self._post("/add_tags/add_tags", data=body)
def add_tags_by_key(self, hash: Union[str, Iterable[str]], tags: Iterable[str], service_key: str) -> dict[str, Any]:
def add_tags_by_key(
self, hash: Union[str, Iterable[str]], tags: Iterable[str], service_key: str
) -> dict[str, Any]:
hash = self._ensure_hashes(hash)
body = {"hashes": hash, "service_keys_to_tags": {service_key: list(tags)}}
return self._post("/add_tags/add_tags", data=body)
@@ -514,7 +547,13 @@ class HydrusNetwork:
query = {"hash": file_hash}
return self._get("/manage_file_relationships/get_file_relationships", query=query)
def set_relationship(self, hash_a: str, hash_b: str, relationship: Union[str, int], do_default_content_merge: bool = False) -> dict[str, Any]:
def set_relationship(
self,
hash_a: str,
hash_b: str,
relationship: Union[str, int],
do_default_content_merge: bool = False,
) -> dict[str, Any]:
"""Set a relationship between two files in Hydrus.
This wraps Hydrus Client API: POST /manage_file_relationships/set_file_relationships.
@@ -622,7 +661,11 @@ class HydrusNetwork:
lambda v: "true" if v else "false" if v is not None else None,
),
("file_sort_type", file_sort_type, lambda v: str(v) if v is not None else None),
("file_sort_asc", file_sort_asc, lambda v: "true" if v else "false" if v is not None else None),
(
"file_sort_asc",
file_sort_asc,
lambda v: "true" if v else "false" if v is not None else None,
),
("file_sort_key", file_sort_key, lambda v: v),
]
@@ -710,90 +753,96 @@ class HydrusCliOptions:
@classmethod
def from_namespace(cls: Type[HydrusCliOptionsT], namespace: Any) -> HydrusCliOptionsT:
accept_header = namespace.accept or 'application/cbor'
accept_header = namespace.accept or "application/cbor"
body_bytes: bytes | None = None
body_path: Path | None = None
if namespace.body_file:
body_path = Path(namespace.body_file)
elif namespace.body is not None:
body_bytes = namespace.body.encode('utf-8')
body_bytes = namespace.body.encode("utf-8")
return cls(
url=namespace.url,
method=namespace.method.upper(),
access_key=namespace.access_key or '',
access_key=namespace.access_key or "",
accept=accept_header,
timeout=namespace.timeout,
content_type=namespace.content_type,
body_bytes=body_bytes,
body_path=body_path,
debug=bool(os.environ.get('DOWNLOW_DEBUG')),
debug=bool(os.environ.get("DOWNLOW_DEBUG")),
)
def hydrus_request(args, parser) -> int:
if args.body and args.body_file:
parser.error('Only one of --body or --body-file may be supplied')
parser.error("Only one of --body or --body-file may be supplied")
options = HydrusCliOptions.from_namespace(args)
parsed = urlsplit(options.url)
if parsed.scheme not in ('http', 'https'):
parser.error('Only http and https url are supported')
if parsed.scheme not in ("http", "https"):
parser.error("Only http and https url are supported")
if not parsed.hostname:
parser.error('Invalid Hydrus URL')
parser.error("Invalid Hydrus URL")
headers: dict[str, str] = {}
if options.access_key:
headers['Hydrus-Client-API-Access-Key'] = options.access_key
headers["Hydrus-Client-API-Access-Key"] = options.access_key
if options.accept:
headers['Accept'] = options.accept
headers["Accept"] = options.accept
request_body_bytes: bytes | None = None
body_path: Path | None = None
if options.body_path is not None:
body_path = options.body_path
if not body_path.is_file():
parser.error(f'File not found: {body_path}')
headers.setdefault('Content-Type', options.content_type or 'application/octet-stream')
headers['Content-Length'] = str(body_path.stat().st_size)
parser.error(f"File not found: {body_path}")
headers.setdefault("Content-Type", options.content_type or "application/octet-stream")
headers["Content-Length"] = str(body_path.stat().st_size)
elif options.body_bytes is not None:
request_body_bytes = options.body_bytes
headers['Content-Type'] = options.content_type or 'application/json'
headers["Content-Type"] = options.content_type or "application/json"
assert request_body_bytes is not None
headers['Content-Length'] = str(len(request_body_bytes))
headers["Content-Length"] = str(len(request_body_bytes))
elif options.content_type:
headers['Content-Type'] = options.content_type
headers["Content-Type"] = options.content_type
if parsed.username or parsed.password:
userinfo = f"{parsed.username or ''}:{parsed.password or ''}".encode('utf-8')
headers['Authorization'] = 'Basic ' + base64.b64encode(userinfo).decode('ascii')
userinfo = f"{parsed.username or ''}:{parsed.password or ''}".encode("utf-8")
headers["Authorization"] = "Basic " + base64.b64encode(userinfo).decode("ascii")
path = parsed.path or '/'
path = parsed.path or "/"
if parsed.query:
path += '?' + parsed.query
path += "?" + parsed.query
port = parsed.port
if port is None:
port = 443 if parsed.scheme == 'https' else 80
port = 443 if parsed.scheme == "https" else 80
connection_cls = http.client.HTTPSConnection if parsed.scheme == 'https' else http.client.HTTPConnection
host = parsed.hostname or 'localhost'
connection_cls = (
http.client.HTTPSConnection if parsed.scheme == "https" else http.client.HTTPConnection
)
host = parsed.hostname or "localhost"
connection = connection_cls(host, port, timeout=options.timeout)
if options.debug:
log(f"Hydrus connecting to {parsed.scheme}://{host}:{port}{path}", file=sys.stderr)
response_bytes: bytes = b''
content_type = ''
response_bytes: bytes = b""
content_type = ""
status = 0
try:
if body_path is not None:
with body_path.open('rb') as handle:
with body_path.open("rb") as handle:
if options.debug:
size_hint = headers.get('Content-Length', 'unknown')
size_hint = headers.get("Content-Length", "unknown")
log(f"Hydrus sending file body ({size_hint} bytes)", file=sys.stderr)
connection.putrequest(options.method, path)
host_header = host
if (parsed.scheme == 'http' and port not in (80, None)) or (parsed.scheme == 'https' and port not in (443, None)):
if (parsed.scheme == "http" and port not in (80, None)) or (
parsed.scheme == "https" and port not in (443, None)
):
host_header = f"{host}:{port}"
connection.putheader('Host', host_header)
connection.putheader("Host", host_header)
for key, value in headers.items():
if value:
connection.putheader(key, value)
@@ -804,27 +853,29 @@ def hydrus_request(args, parser) -> int:
break
connection.send(chunk)
if options.debug:
log('[downlow.py] Hydrus upload complete; awaiting response', file=sys.stderr)
log("[downlow.py] Hydrus upload complete; awaiting response", file=sys.stderr)
else:
if options.debug:
size_hint = 'none' if request_body_bytes is None else str(len(request_body_bytes))
size_hint = "none" if request_body_bytes is None else str(len(request_body_bytes))
log(f"Hydrus sending request body bytes={size_hint}", file=sys.stderr)
sanitized_headers = {k: v for k, v in headers.items() if v}
connection.request(options.method, path, body=request_body_bytes, headers=sanitized_headers)
connection.request(
options.method, path, body=request_body_bytes, headers=sanitized_headers
)
response = connection.getresponse()
status = response.status
response_bytes = response.read()
if options.debug:
log(f"Hydrus response received ({len(response_bytes)} bytes)", file=sys.stderr)
content_type = response.getheader('Content-Type', '')
content_type = response.getheader("Content-Type", "")
except (OSError, http.client.HTTPException) as exc:
log(f"HTTP error: {exc}", file=sys.stderr)
return 1
finally:
connection.close()
content_type_lower = (content_type or '').split(';', 1)[0].strip().lower()
accept_value = options.accept or ''
expect_cbor = 'cbor' in (content_type_lower or '') or 'cbor' in accept_value.lower()
content_type_lower = (content_type or "").split(";", 1)[0].strip().lower()
accept_value = options.accept or ""
expect_cbor = "cbor" in (content_type_lower or "") or "cbor" in accept_value.lower()
payload = None
decode_error: Exception | None = None
if response_bytes:
@@ -835,9 +886,9 @@ def hydrus_request(args, parser) -> int:
decode_error = exc
if payload is None and not expect_cbor:
try:
payload = json.loads(response_bytes.decode('utf-8'))
payload = json.loads(response_bytes.decode("utf-8"))
except (json.JSONDecodeError, UnicodeDecodeError):
payload = response_bytes.decode('utf-8', 'replace')
payload = response_bytes.decode("utf-8", "replace")
elif payload is None and expect_cbor and decode_error is not None:
log(f"Expected CBOR response but decoding failed: {decode_error}", file=sys.stderr)
return 1
@@ -847,10 +898,12 @@ def hydrus_request(args, parser) -> int:
if isinstance(json_ready, (dict, list)):
log(json.dumps(json_ready, ensure_ascii=False))
elif json_ready is None:
log('{}')
log("{}")
else:
log(json.dumps({'value': json_ready}, ensure_ascii=False))
log(json.dumps({"value": json_ready}, ensure_ascii=False))
return 0 if 200 <= status < 400 else 1
def hydrus_export(args, _parser) -> int:
from metadata import apply_mutagen_metadata, build_ffmpeg_command, prepare_ffmpeg_metadata
@@ -858,7 +911,7 @@ def hydrus_export(args, _parser) -> int:
original_suffix = output_path.suffix
target_dir = output_path.parent
metadata_payload: Optional[dict[str, Any]] = None
metadata_raw = getattr(args, 'metadata_json', None)
metadata_raw = getattr(args, "metadata_json", None)
if metadata_raw:
try:
parsed = json.loads(metadata_raw)
@@ -868,7 +921,7 @@ def hydrus_export(args, _parser) -> int:
if isinstance(parsed, dict):
metadata_payload = parsed
else:
log('[downlow.py] Metadata JSON must decode to an object', file=sys.stderr)
log("[downlow.py] Metadata JSON must decode to an object", file=sys.stderr)
return 1
ffmpeg_metadata = prepare_ffmpeg_metadata(metadata_payload)
@@ -878,8 +931,8 @@ def hydrus_export(args, _parser) -> int:
cleaned = value.strip()
if not cleaned:
return None
if not cleaned.startswith('.'): # tolerate inputs like "mp4"
cleaned = '.' + cleaned.lstrip('.')
if not cleaned.startswith("."): # tolerate inputs like "mp4"
cleaned = "." + cleaned.lstrip(".")
return cleaned
def _extension_from_mime(mime: Optional[str]) -> Optional[str]:
@@ -887,84 +940,78 @@ def hydrus_export(args, _parser) -> int:
return None
mime_map = {
# Images / bitmaps
'image/jpeg': '.jpg',
'image/jpg': '.jpg',
'image/png': '.png',
'image/gif': '.gif',
'image/webp': '.webp',
'image/avif': '.avif',
'image/jxl': '.jxl', # JPEG XL
'image/bmp': '.bmp',
'image/heic': '.heic',
'image/heif': '.heif',
'image/x-icon': '.ico',
'image/vnd.microsoft.icon': '.ico',
'image/qoi': '.qoi', # Quite OK Image
'image/tiff': '.tiff',
'image/svg+xml': '.svg',
'image/vnd.adobe.photoshop': '.psd',
"image/jpeg": ".jpg",
"image/jpg": ".jpg",
"image/png": ".png",
"image/gif": ".gif",
"image/webp": ".webp",
"image/avif": ".avif",
"image/jxl": ".jxl", # JPEG XL
"image/bmp": ".bmp",
"image/heic": ".heic",
"image/heif": ".heif",
"image/x-icon": ".ico",
"image/vnd.microsoft.icon": ".ico",
"image/qoi": ".qoi", # Quite OK Image
"image/tiff": ".tiff",
"image/svg+xml": ".svg",
"image/vnd.adobe.photoshop": ".psd",
# Animation / sequence variants
'image/apng': '.apng',
'image/avif-sequence': '.avifs',
'image/heic-sequence': '.heics',
'image/heif-sequence': '.heifs',
"image/apng": ".apng",
"image/avif-sequence": ".avifs",
"image/heic-sequence": ".heics",
"image/heif-sequence": ".heifs",
# Video
'video/mp4': '.mp4',
'video/webm': '.webm',
'video/quicktime': '.mov',
'video/ogg': '.ogv',
'video/mpeg': '.mpeg',
'video/x-msvideo': '.avi',
'video/x-flv': '.flv',
'video/x-matroska': '.mkv',
'video/x-ms-wmv': '.wmv',
'video/vnd.rn-realvideo': '.rv',
"video/mp4": ".mp4",
"video/webm": ".webm",
"video/quicktime": ".mov",
"video/ogg": ".ogv",
"video/mpeg": ".mpeg",
"video/x-msvideo": ".avi",
"video/x-flv": ".flv",
"video/x-matroska": ".mkv",
"video/x-ms-wmv": ".wmv",
"video/vnd.rn-realvideo": ".rv",
# Audio
'audio/mpeg': '.mp3',
'audio/mp4': '.m4a',
'audio/ogg': '.ogg',
'audio/flac': '.flac',
'audio/wav': '.wav',
'audio/x-wav': '.wav',
'audio/x-ms-wma': '.wma',
'audio/x-tta': '.tta',
'audio/vnd.wave': '.wav',
'audio/x-wavpack': '.wv',
"audio/mpeg": ".mp3",
"audio/mp4": ".m4a",
"audio/ogg": ".ogg",
"audio/flac": ".flac",
"audio/wav": ".wav",
"audio/x-wav": ".wav",
"audio/x-ms-wma": ".wma",
"audio/x-tta": ".tta",
"audio/vnd.wave": ".wav",
"audio/x-wavpack": ".wv",
# Documents / office
'application/pdf': '.pdf',
'application/epub+zip': '.epub',
'application/vnd.djvu': '.djvu',
'application/rtf': '.rtf',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': '.xlsx',
'application/vnd.openxmlformats-officedocument.presentationml.presentation': '.pptx',
'application/msword': '.doc',
'application/vnd.ms-excel': '.xls',
'application/vnd.ms-powerpoint': '.ppt',
"application/pdf": ".pdf",
"application/epub+zip": ".epub",
"application/vnd.djvu": ".djvu",
"application/rtf": ".rtf",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
"application/msword": ".doc",
"application/vnd.ms-excel": ".xls",
"application/vnd.ms-powerpoint": ".ppt",
# Archive / comicbook / zip-like
'application/zip': '.zip',
'application/x-7z-compressed': '.7z',
'application/x-rar-compressed': '.rar',
'application/gzip': '.gz',
'application/x-tar': '.tar',
'application/x-cbz': '.cbz', # often just ZIP with images; CBZ is not an official mime type but used as mapping
"application/zip": ".zip",
"application/x-7z-compressed": ".7z",
"application/x-rar-compressed": ".rar",
"application/gzip": ".gz",
"application/x-tar": ".tar",
"application/x-cbz": ".cbz", # often just ZIP with images; CBZ is not an official mime type but used as mapping
# App / project / other
'application/clip': '.clip', # Clip Studio
'application/x-krita': '.kra',
'application/x-procreate': '.procreate',
'application/x-shockwave-flash': '.swf',
"application/clip": ".clip", # Clip Studio
"application/x-krita": ".kra",
"application/x-procreate": ".procreate",
"application/x-shockwave-flash": ".swf",
}
return mime_map.get(mime.lower())
def _extract_hash(file_url: str) -> Optional[str]:
match = re.search(r'[?&]hash=([0-9a-fA-F]+)', file_url)
match = re.search(r"[?&]hash=([0-9a-fA-F]+)", file_url)
return match.group(1) if match else None
# Ensure output and temp directories exist using global helper
@@ -975,57 +1022,62 @@ def hydrus_export(args, _parser) -> int:
log(f"{exc}", file=sys.stderr)
return 1
source_suffix = _normalise_ext(getattr(args, 'source_ext', None))
if source_suffix and source_suffix.lower() == '.bin':
source_suffix = _normalise_ext(getattr(args, "source_ext", None))
if source_suffix and source_suffix.lower() == ".bin":
source_suffix = None
if source_suffix is None:
hydrus_url = getattr(args, 'hydrus_url', None)
hydrus_url = getattr(args, "hydrus_url", None)
if not hydrus_url:
try:
from config import load_config, get_hydrus_url
hydrus_url = get_hydrus_url(load_config())
except Exception as exc:
hydrus_url = None
if os.environ.get('DOWNLOW_DEBUG'):
if os.environ.get("DOWNLOW_DEBUG"):
log(f"hydrus-export could not load Hydrus URL: {exc}", file=sys.stderr)
if hydrus_url:
try:
setattr(args, 'hydrus_url', hydrus_url)
setattr(args, "hydrus_url", hydrus_url)
except Exception:
pass
resolved_suffix: Optional[str] = None
file_hash = getattr(args, 'file_hash', None) or _extract_hash(args.file_url)
file_hash = getattr(args, "file_hash", None) or _extract_hash(args.file_url)
if hydrus_url and file_hash:
try:
client = HydrusNetwork(url=hydrus_url, access_key=args.access_key, timeout=args.timeout)
client = HydrusNetwork(
url=hydrus_url, access_key=args.access_key, timeout=args.timeout
)
meta_response = client.fetch_file_metadata(hashes=[file_hash], include_mime=True)
entries = meta_response.get('metadata') if isinstance(meta_response, dict) else None
entries = meta_response.get("metadata") if isinstance(meta_response, dict) else None
if isinstance(entries, list) and entries:
entry = entries[0]
ext_value = _normalise_ext(entry.get('ext') if isinstance(entry, dict) else None)
ext_value = _normalise_ext(
entry.get("ext") if isinstance(entry, dict) else None
)
if ext_value:
resolved_suffix = ext_value
else:
mime_value = entry.get('mime') if isinstance(entry, dict) else None
mime_value = entry.get("mime") if isinstance(entry, dict) else None
resolved_suffix = _extension_from_mime(mime_value)
except Exception as exc: # pragma: no cover - defensive
if os.environ.get('DOWNLOW_DEBUG'):
if os.environ.get("DOWNLOW_DEBUG"):
log(f"hydrus metadata fetch failed: {exc}", file=sys.stderr)
if not resolved_suffix:
fallback_suffix = _normalise_ext(original_suffix)
if fallback_suffix and fallback_suffix.lower() == '.bin':
if fallback_suffix and fallback_suffix.lower() == ".bin":
fallback_suffix = None
resolved_suffix = fallback_suffix or '.hydrus'
resolved_suffix = fallback_suffix or ".hydrus"
source_suffix = resolved_suffix
suffix = source_suffix or '.hydrus'
if suffix and output_path.suffix.lower() in {'', '.bin'}:
suffix = source_suffix or ".hydrus"
if suffix and output_path.suffix.lower() in {"", ".bin"}:
if output_path.suffix.lower() != suffix.lower():
output_path = output_path.with_suffix(suffix)
target_dir = output_path.parent
# Determine temp directory (prefer provided tmp_dir, fallback to output location)
temp_dir = Path(getattr(args, 'tmp_dir', None) or target_dir)
temp_dir = Path(getattr(args, "tmp_dir", None) or target_dir)
try:
ensure_directory(temp_dir)
except RuntimeError:
@@ -1035,11 +1087,11 @@ def hydrus_export(args, _parser) -> int:
temp_file.close()
downloaded_bytes = 0
headers = {
'Hydrus-Client-API-Access-Key': args.access_key,
"Hydrus-Client-API-Access-Key": args.access_key,
}
try:
downloaded_bytes = download_hydrus_file(args.file_url, headers, temp_path, args.timeout)
if os.environ.get('DOWNLOW_DEBUG'):
if os.environ.get("DOWNLOW_DEBUG"):
log(f"hydrus-export downloaded {downloaded_bytes} bytes", file=sys.stderr)
except httpx.RequestError as exc:
if temp_path.exists():
@@ -1055,19 +1107,26 @@ def hydrus_export(args, _parser) -> int:
converted_tmp: Optional[Path] = None
try:
final_target = unique_path(output_path)
if args.format == 'copy':
if args.format == "copy":
shutil.move(str(temp_path), str(final_target))
result_path = final_target
else:
ffmpeg_path = shutil.which('ffmpeg')
ffmpeg_path = shutil.which("ffmpeg")
if not ffmpeg_path:
raise RuntimeError('ffmpeg executable not found in PATH')
converted_tmp = final_target.with_suffix(final_target.suffix + '.part')
raise RuntimeError("ffmpeg executable not found in PATH")
converted_tmp = final_target.with_suffix(final_target.suffix + ".part")
if converted_tmp.exists():
converted_tmp.unlink()
max_width = args.max_width if args.max_width and args.max_width > 0 else 0
cmd = build_ffmpeg_command(ffmpeg_path, temp_path, converted_tmp, args.format, max_width, metadata=ffmpeg_metadata if ffmpeg_metadata else None)
if os.environ.get('DOWNLOW_DEBUG'):
cmd = build_ffmpeg_command(
ffmpeg_path,
temp_path,
converted_tmp,
args.format,
max_width,
metadata=ffmpeg_metadata if ffmpeg_metadata else None,
)
if os.environ.get("DOWNLOW_DEBUG"):
log(f"ffmpeg command: {' '.join(cmd)}", file=sys.stderr)
completed = subprocess.run(
cmd,
@@ -1076,24 +1135,24 @@ def hydrus_export(args, _parser) -> int:
check=False,
text=True,
)
ffmpeg_log = (completed.stderr or '').strip()
ffmpeg_log = (completed.stderr or "").strip()
if completed.returncode != 0:
error_details = ffmpeg_log or (completed.stdout or '').strip()
error_details = ffmpeg_log or (completed.stdout or "").strip()
raise RuntimeError(
f'ffmpeg failed with exit code {completed.returncode}'
+ (f': {error_details}' if error_details else '')
f"ffmpeg failed with exit code {completed.returncode}"
+ (f": {error_details}" if error_details else "")
)
shutil.move(str(converted_tmp), str(final_target))
result_path = final_target
apply_mutagen_metadata(result_path, ffmpeg_metadata, args.format)
result_size = result_path.stat().st_size if result_path.exists() else None
payload: dict[str, object] = {'output': str(result_path)}
payload: dict[str, object] = {"output": str(result_path)}
if downloaded_bytes:
payload['source_bytes'] = downloaded_bytes
payload["source_bytes"] = downloaded_bytes
if result_size is not None:
payload['size_bytes'] = result_size
payload["size_bytes"] = result_size
if metadata_payload:
payload['metadata_keys'] = sorted(ffmpeg_metadata.keys()) if ffmpeg_metadata else []
payload["metadata_keys"] = sorted(ffmpeg_metadata.keys()) if ffmpeg_metadata else []
log(json.dumps(payload, ensure_ascii=False))
if ffmpeg_log:
log(ffmpeg_log, file=sys.stderr)
@@ -1125,87 +1184,87 @@ def hydrus_export(args, _parser) -> int:
# Source: https://hydrusnetwork.github.io/hydrus/filetypes.html
SUPPORTED_FILETYPES = {
# Images
'image': {
'.jpeg': 'image/jpeg',
'.jpg': 'image/jpeg',
'.png': 'image/png',
'.gif': 'image/gif',
'.webp': 'image/webp',
'.avif': 'image/avif',
'.jxl': 'image/jxl',
'.bmp': 'image/bmp',
'.heic': 'image/heic',
'.heif': 'image/heif',
'.ico': 'image/x-icon',
'.qoi': 'image/qoi',
'.tiff': 'image/tiff',
"image": {
".jpeg": "image/jpeg",
".jpg": "image/jpeg",
".png": "image/png",
".gif": "image/gif",
".webp": "image/webp",
".avif": "image/avif",
".jxl": "image/jxl",
".bmp": "image/bmp",
".heic": "image/heic",
".heif": "image/heif",
".ico": "image/x-icon",
".qoi": "image/qoi",
".tiff": "image/tiff",
},
# Animated Images
'animation': {
'.apng': 'image/apng',
'.avifs': 'image/avif-sequence',
'.heics': 'image/heic-sequence',
'.heifs': 'image/heif-sequence',
"animation": {
".apng": "image/apng",
".avifs": "image/avif-sequence",
".heics": "image/heic-sequence",
".heifs": "image/heif-sequence",
},
# Video
'video': {
'.mp4': 'video/mp4',
'.webm': 'video/webm',
'.mkv': 'video/x-matroska',
'.avi': 'video/x-msvideo',
'.flv': 'video/x-flv',
'.mov': 'video/quicktime',
'.mpeg': 'video/mpeg',
'.ogv': 'video/ogg',
'.rm': 'video/vnd.rn-realvideo',
'.wmv': 'video/x-ms-wmv',
"video": {
".mp4": "video/mp4",
".webm": "video/webm",
".mkv": "video/x-matroska",
".avi": "video/x-msvideo",
".flv": "video/x-flv",
".mov": "video/quicktime",
".mpeg": "video/mpeg",
".ogv": "video/ogg",
".rm": "video/vnd.rn-realvideo",
".wmv": "video/x-ms-wmv",
},
# Audio
'audio': {
'.mp3': 'audio/mp3',
'.ogg': 'audio/ogg',
'.flac': 'audio/flac',
'.m4a': 'audio/mp4',
'.mka': 'audio/x-matroska',
'.mkv': 'audio/x-matroska',
'.mp4': 'audio/mp4',
'.ra': 'audio/vnd.rn-realaudio',
'.tta': 'audio/x-tta',
'.wav': 'audio/x-wav',
'.wv': 'audio/wavpack',
'.wma': 'audio/x-ms-wma',
"audio": {
".mp3": "audio/mp3",
".ogg": "audio/ogg",
".flac": "audio/flac",
".m4a": "audio/mp4",
".mka": "audio/x-matroska",
".mkv": "audio/x-matroska",
".mp4": "audio/mp4",
".ra": "audio/vnd.rn-realaudio",
".tta": "audio/x-tta",
".wav": "audio/x-wav",
".wv": "audio/wavpack",
".wma": "audio/x-ms-wma",
},
# Applications & Documents
'application': {
'.swf': 'application/x-shockwave-flash',
'.pdf': 'application/pdf',
'.epub': 'application/epub+zip',
'.djvu': 'image/vnd.djvu',
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'.doc': 'application/msword',
'.xls': 'application/vnd.ms-excel',
'.ppt': 'application/vnd.ms-powerpoint',
'.rtf': 'application/rtf',
"application": {
".swf": "application/x-shockwave-flash",
".pdf": "application/pdf",
".epub": "application/epub+zip",
".djvu": "image/vnd.djvu",
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
".doc": "application/msword",
".xls": "application/vnd.ms-excel",
".ppt": "application/vnd.ms-powerpoint",
".rtf": "application/rtf",
},
# Image Project Files
'project': {
'.clip': 'application/clip1',
'.kra': 'application/x-krita',
'.procreate': 'application/x-procreate1',
'.psd': 'image/vnd.adobe.photoshop',
'.sai2': 'application/sai21',
'.svg': 'image/svg+xml',
'.xcf': 'application/x-xcf',
"project": {
".clip": "application/clip1",
".kra": "application/x-krita",
".procreate": "application/x-procreate1",
".psd": "image/vnd.adobe.photoshop",
".sai2": "application/sai21",
".svg": "image/svg+xml",
".xcf": "application/x-xcf",
},
# Archives
'archive': {
'.cbz': 'application/vnd.comicbook+zip',
'.7z': 'application/x-7z-compressed',
'.gz': 'application/gzip',
'.rar': 'application/vnd.rar',
'.zip': 'application/zip',
"archive": {
".cbz": "application/vnd.comicbook+zip",
".7z": "application/x-7z-compressed",
".gz": "application/gzip",
".rar": "application/vnd.rar",
".zip": "application/zip",
},
}
@@ -1279,8 +1338,8 @@ def is_available(config: dict[str, Any], use_cache: bool = True) -> tuple[bool,
from urllib.parse import urlparse
parsed = urlparse(url)
hostname = parsed.hostname or 'localhost'
port = parsed.port or (443 if parsed.scheme == 'https' else 80)
hostname = parsed.hostname or "localhost"
port = parsed.port or (443 if parsed.scheme == "https" else 80)
# Try to connect to the host/port
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
@@ -1345,7 +1404,9 @@ def get_client(config: dict[str, Any]) -> HydrusNetwork:
# Use new config helpers
hydrus_url = (get_hydrus_url(config, "home") or "").strip()
if not hydrus_url:
raise RuntimeError("Hydrus URL is not configured (check config.conf store.hydrusnetwork.home.URL)")
raise RuntimeError(
"Hydrus URL is not configured (check config.conf store.hydrusnetwork.home.URL)"
)
access_key = get_hydrus_access_key(config, "home") or ""
timeout_raw = config.get("HydrusNetwork_Request_Timeout")
@@ -1435,7 +1496,10 @@ def is_request_error(exc: Exception) -> bool:
CHUNK_SIZE = 1024 * 1024 # 1 MiB
def download_hydrus_file(file_url: str, headers: dict[str, str], destination: Path, timeout: float) -> int:
def download_hydrus_file(
file_url: str, headers: dict[str, str], destination: Path, timeout: float
) -> int:
"""Download *file_url* into *destination* returning the byte count with progress bar."""
from SYS.progress import print_progress, print_final_progress
@@ -1451,13 +1515,13 @@ def download_hydrus_file(file_url: str, headers: dict[str, str], destination: Pa
# Try to get size from content-length header
try:
file_size = int(response.headers.get('content-length', 0))
file_size = int(response.headers.get("content-length", 0))
except (ValueError, TypeError):
file_size = None
filename = destination.name
with destination.open('wb') as handle:
with destination.open("wb") as handle:
for chunk in response.iter_bytes(CHUNK_SIZE):
if not chunk:
break
+85 -70
View File
@@ -21,6 +21,7 @@ logger = logging.getLogger(__name__)
class AllDebridError(Exception):
"""Raised when AllDebrid API request fails."""
pass
@@ -38,10 +39,10 @@ def _ping_alldebrid(base_url: str) -> Tuple[bool, Optional[str]]:
"""Ping the AllDebrid API base URL (no API key required)."""
try:
url = str(base_url or "").rstrip("/") + "/ping"
with HTTPClient(timeout=10.0, headers={'User-Agent': 'downlow/1.0'}) as client:
with HTTPClient(timeout=10.0, headers={"User-Agent": "downlow/1.0"}) as client:
response = client.get(url)
data = json.loads(response.content.decode('utf-8'))
if data.get('status') == 'success' and data.get('data', {}).get('ping') == 'pong':
data = json.loads(response.content.decode("utf-8"))
if data.get("status") == "success" and data.get("data", {}).get("ping") == "pong":
return True, None
return False, "Invalid API response"
except Exception as exc:
@@ -155,21 +156,23 @@ class AllDebridClient:
response.raise_for_status()
except Exception as req_err:
# Log detailed error info
logger.error(f"[AllDebrid] Request error to {endpoint}: {req_err}", exc_info=True)
if hasattr(req_err, 'response') and req_err.response is not None: # type: ignore
logger.error(
f"[AllDebrid] Request error to {endpoint}: {req_err}", exc_info=True
)
if hasattr(req_err, "response") and req_err.response is not None: # type: ignore
try:
error_body = req_err.response.content.decode('utf-8') # type: ignore
error_body = req_err.response.content.decode("utf-8") # type: ignore
logger.error(f"[AllDebrid] Response body: {error_body[:200]}")
except:
pass
raise
data = json.loads(response.content.decode('utf-8'))
data = json.loads(response.content.decode("utf-8"))
logger.debug(f"[AllDebrid] Response status: {response.status_code}")
# Check for API errors
if data.get('status') == 'error':
error_msg = data.get('error', {}).get('message', 'Unknown error')
if data.get("status") == "error":
error_msg = data.get("error", {}).get("message", "Unknown error")
logger.error(f"[AllDebrid] API error: {error_msg}")
raise AllDebridError(f"AllDebrid API error: {error_msg}")
@@ -193,23 +196,23 @@ class AllDebridClient:
Raises:
AllDebridError: If unlock fails
"""
if not link.startswith(('http://', 'https://')):
if not link.startswith(("http://", "https://")):
raise AllDebridError(f"Invalid URL: {link}")
try:
response = self._request('link/unlock', {'link': link})
response = self._request("link/unlock", {"link": link})
# Check if unlock was successful
if response.get('status') == 'success':
data = response.get('data', {})
if response.get("status") == "success":
data = response.get("data", {})
# AllDebrid returns the download info in 'link' field
if 'link' in data:
return data['link']
if "link" in data:
return data["link"]
# Alternative: check for 'file' field
if 'file' in data:
return data['file']
if "file" in data:
return data["file"]
# If no direct link, return the input link
return link
@@ -268,10 +271,10 @@ class AllDebridClient:
"""
try:
# v4 endpoint is `/user`
response = self._request('user')
response = self._request("user")
if response.get('status') == 'success':
return response.get('data', {})
if response.get("status") == "success":
return response.get("data", {})
return {}
except AllDebridError:
@@ -290,10 +293,10 @@ class AllDebridClient:
AllDebridError: If request fails
"""
try:
response = self._request('hosts/domains')
response = self._request("hosts/domains")
if response.get('status') == 'success':
data = response.get('data', {})
if response.get("status") == "success":
data = response.get("data", {})
return data if isinstance(data, dict) else {}
return {}
@@ -328,19 +331,19 @@ class AllDebridClient:
try:
# API endpoint: POST /v4/magnet/upload
# Format: /magnet/upload?apikey=key&magnets[]=magnet:?xt=...
response = self._request('magnet/upload', {'magnets[]': magnet_uri})
response = self._request("magnet/upload", {"magnets[]": magnet_uri})
if response.get('status') == 'success':
data = response.get('data', {})
magnets = data.get('magnets', [])
if response.get("status") == "success":
data = response.get("data", {})
magnets = data.get("magnets", [])
if magnets and len(magnets) > 0:
magnet_info = magnets[0]
# Check for errors in the magnet response
if 'error' in magnet_info:
error = magnet_info['error']
error_msg = error.get('message', 'Unknown error')
if "error" in magnet_info:
error = magnet_info["error"]
error_msg = error.get("message", "Unknown error")
raise AllDebridError(f"Magnet error: {error_msg}")
return magnet_info
@@ -393,13 +396,13 @@ class AllDebridClient:
self.base_url = self.BASE_URL_V41
try:
response = self._request('magnet/status', {'id': str(magnet_id)})
response = self._request("magnet/status", {"id": str(magnet_id)})
finally:
self.base_url = old_base
if response.get('status') == 'success':
data = response.get('data', {})
magnets = data.get('magnets', {})
if response.get("status") == "success":
data = response.get("data", {})
magnets = data.get("magnets", {})
# Handle both list and dict responses
if isinstance(magnets, list) and len(magnets) > 0:
@@ -429,15 +432,15 @@ class AllDebridClient:
old_base = self.base_url
self.base_url = self.BASE_URL_V41
try:
response = self._request('magnet/status')
response = self._request("magnet/status")
finally:
self.base_url = old_base
if response.get('status') != 'success':
if response.get("status") != "success":
return []
data = response.get('data', {})
magnets = data.get('magnets', [])
data = response.get("data", {})
magnets = data.get("magnets", [])
if isinstance(magnets, list):
return [m for m in magnets if isinstance(m, dict)]
@@ -445,7 +448,7 @@ class AllDebridClient:
# Some API variants may return a dict.
if isinstance(magnets, dict):
# If it's a single magnet dict, wrap it; if it's an id->magnet mapping, return values.
if 'id' in magnets:
if "id" in magnets:
return [magnets]
return [m for m in magnets.values() if isinstance(m, dict)]
@@ -455,7 +458,9 @@ class AllDebridClient:
except Exception as exc:
raise AllDebridError(f"Failed to list magnets: {exc}")
def magnet_status_live(self, magnet_id: int, session: Optional[int] = None, counter: int = 0) -> Dict[str, Any]:
def magnet_status_live(
self, magnet_id: int, session: Optional[int] = None, counter: int = 0
) -> Dict[str, Any]:
"""Get live status of a magnet using delta sync mode.
The live mode endpoint provides real-time progress by only sending
@@ -492,13 +497,13 @@ class AllDebridClient:
if session is not None:
payload["session"] = str(int(session))
payload["counter"] = str(int(counter))
response = self._request('magnet/status', payload)
response = self._request("magnet/status", payload)
finally:
self.base_url = old_base
if response.get('status') == 'success':
data = response.get('data', {})
magnets = data.get('magnets', [])
if response.get("status") == "success":
data = response.get("data", {})
magnets = data.get("magnets", [])
# For specific magnet id, return the first match from the array.
if isinstance(magnets, list) and len(magnets) > 0:
@@ -542,18 +547,18 @@ class AllDebridClient:
# Build parameter: id[]=123&id[]=456 style
params = {}
for i, magnet_id in enumerate(magnet_ids):
params[f'id[{i}]'] = str(magnet_id)
params[f"id[{i}]"] = str(magnet_id)
response = self._request('magnet/files', params)
response = self._request("magnet/files", params)
if response.get('status') == 'success':
data = response.get('data', {})
magnets = data.get('magnets', [])
if response.get("status") == "success":
data = response.get("data", {})
magnets = data.get("magnets", [])
# Convert list to dict keyed by ID (as string) for easier access
result = {}
for magnet_info in magnets:
magnet_id = magnet_info.get('id')
magnet_id = magnet_info.get("id")
if magnet_id:
result[str(magnet_id)] = magnet_info
@@ -584,10 +589,11 @@ class AllDebridClient:
"""
try:
# Parse magnet hash if needed
if magnet_hash.startswith('magnet:'):
if magnet_hash.startswith("magnet:"):
# Extract hash from magnet URI
import re
match = re.search(r'xt=urn:btih:([a-fA-F0-9]+)', magnet_hash)
match = re.search(r"xt=urn:btih:([a-fA-F0-9]+)", magnet_hash)
if not match:
return None
hash_value = match.group(1)
@@ -597,12 +603,12 @@ class AllDebridClient:
if not hash_value or len(hash_value) < 32:
return None
response = self._request('magnet/instant', {'magnet': hash_value})
response = self._request("magnet/instant", {"magnet": hash_value})
if response.get('status') == 'success':
data = response.get('data', {})
if response.get("status") == "success":
data = response.get("data", {})
# Returns 'files' array if available, or empty
return data.get('files', [])
return data.get("files", [])
# Not available is not an error, just return empty list
return []
@@ -629,9 +635,9 @@ class AllDebridClient:
raise AllDebridError(f"Invalid magnet ID: {magnet_id}")
try:
response = self._request('magnet/delete', {'id': str(magnet_id)})
response = self._request("magnet/delete", {"id": str(magnet_id)})
if response.get('status') == 'success':
if response.get("status") == "success":
return True
raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
@@ -672,16 +678,16 @@ def _get_cached_supported_hosters(api_key: str) -> Set[str]:
all_domains: Set[str] = set()
# Add hosts
if 'hosts' in hosters_dict and isinstance(hosters_dict['hosts'], list):
all_domains.update(hosters_dict['hosts'])
if "hosts" in hosters_dict and isinstance(hosters_dict["hosts"], list):
all_domains.update(hosters_dict["hosts"])
# Add streams
if 'streams' in hosters_dict and isinstance(hosters_dict['streams'], list):
all_domains.update(hosters_dict['streams'])
if "streams" in hosters_dict and isinstance(hosters_dict["streams"], list):
all_domains.update(hosters_dict["streams"])
# Add redirectors
if 'redirectors' in hosters_dict and isinstance(hosters_dict['redirectors'], list):
all_domains.update(hosters_dict['redirectors'])
if "redirectors" in hosters_dict and isinstance(hosters_dict["redirectors"], list):
all_domains.update(hosters_dict["redirectors"])
# Cache as dict for consistency
_SUPPORTED_HOSTERS_CACHE = {domain: {} for domain in all_domains}
@@ -723,7 +729,7 @@ def is_link_restrictable_hoster(url: str, api_key: str) -> bool:
domain = parsed.netloc.lower()
# Remove www. prefix for comparison
if domain.startswith('www.'):
if domain.startswith("www."):
domain = domain[4:]
# Get supported hosters (cached)
@@ -733,9 +739,15 @@ def is_link_restrictable_hoster(url: str, api_key: str) -> bool:
# API check failed, fall back to manual detection
# Check for common restricted hosters
common_hosters = {
'uploadhaven.com', 'uploaded.to', 'uploaded.net',
'datafile.com', 'rapidfile.io', 'nitroflare.com',
'1fichier.com', 'mega.nz', 'mediafire.com'
"uploadhaven.com",
"uploaded.to",
"uploaded.net",
"datafile.com",
"rapidfile.io",
"nitroflare.com",
"1fichier.com",
"mega.nz",
"mediafire.com",
}
return any(host in url.lower() for host in common_hosters)
@@ -790,7 +802,7 @@ def is_magnet_link(uri: str) -> bool:
"""
if not uri:
return False
return uri.lower().startswith('magnet:')
return uri.lower().startswith("magnet:")
def is_torrent_hash(text: str) -> bool:
@@ -834,7 +846,7 @@ def is_torrent_file(path: str) -> bool:
"""
if not path:
return False
return path.lower().endswith('.torrent')
return path.lower().endswith(".torrent")
def parse_magnet_or_hash(uri: str) -> Optional[str]:
@@ -871,6 +883,7 @@ def parse_magnet_or_hash(uri: str) -> Optional[str]:
# Cmdlet: unlock_link
# ============================================================================
def unlock_link_cmdlet(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Unlock a restricted link using AllDebrid.
@@ -891,6 +904,7 @@ def unlock_link_cmdlet(result: Any, args: Sequence[str], config: Dict[str, Any])
Returns:
0 on success, 1 on failure
"""
def _extract_link_from_args_or_result(result_obj: Any, argv: Sequence[str]) -> Optional[str]:
# Prefer an explicit URL in args.
for a in argv or []:
@@ -974,6 +988,7 @@ def unlock_link_cmdlet(result: Any, args: Sequence[str], config: Dict[str, Any])
# Cmdlet Registration
# ============================================================================
def _register_unlock_link():
"""Register unlock-link command with cmdlet registry if available."""
try:
+572 -277
View File
File diff suppressed because it is too large Load Diff
+490 -140
View File
File diff suppressed because it is too large Load Diff
+92
View File
@@ -0,0 +1,92 @@
# get-url Command Enhancement Summary
## What Changed
Enhanced the `get-url` command in [cmdlet/get_url.py](cmdlet/get_url.py) to support searching for URLs across all stores with smart pattern matching.
## Key Features Added
### 1. URL Normalization (`_normalize_url_for_search`)
- Strips protocol prefixes: `https://`, `http://`, `ftp://`, etc.
- Removes `www.` prefix (case-insensitive)
- Converts to lowercase for case-insensitive matching
**Examples:**
- `https://www.youtube.com/watch?v=xx``youtube.com/watch?v=xx`
- `http://www.google.com``google.com`
- `FTP://cdn.example.com``cdn.example.com`
### 2. Wildcard Pattern Matching (`_match_url_pattern`)
- Supports `*` (matches any sequence) and `?` (matches single character)
- Case-insensitive matching
- Uses Python's `fnmatch` for robust pattern support
**Examples:**
- `youtube.com*` matches `youtube.com/watch`, `youtube.com/shorts`, etc.
- `*.example.com*` matches `cdn.example.com`, `api.example.com`, etc.
- `google.com/search*` matches `google.com/search?q=term`, etc.
### 3. Cross-Store URL Search (`_search_urls_across_stores`)
- Searches all configured stores (hydrus, folder, etc.)
- Finds matching URLs across all files in all stores
- Returns results grouped by store
- Emits `UrlItem` objects for pipelining
## Command Usage
### Search for URLs matching a pattern
```bash
get-url -url "www.google.com"
get-url -url "youtube.com*"
get-url -url "*.example.com*"
```
### Original usage (unchanged)
```bash
@1 | get-url
# Requires hash and store from piped result
```
## Implementation Details
### New Methods
- `_normalize_url_for_search(url)` - Static method to normalize URLs
- `_match_url_pattern(url, pattern)` - Static method to match with wildcards
- `_search_urls_across_stores(pattern, config)` - Search across all stores
### Modified Method
- `run()` - Enhanced to support `-url` flag for searching, fallback to original behavior
### Return Values
- **Search mode**: List of `UrlItem` objects grouped by store, exit code 0 if found, 1 if no matches
- **Original mode**: URLs for specific file, exit code 0 if found, 1 if not found
## Testing
A test script is included: [test_get_url_search.py](test_get_url_search.py)
**All tests pass:**
- ✓ URL normalization (protocol/www stripping)
- ✓ Wildcard pattern matching
- ✓ Case-insensitive matching
- ✓ Complex patterns with subdomains and paths
## Files Modified
- [cmdlet/get_url.py](cmdlet/get_url.py) - Enhanced with URL search functionality
- [docs/GET_URL_SEARCH.md](docs/GET_URL_SEARCH.md) - User documentation
- [test_get_url_search.py](test_get_url_search.py) - Test suite
## Backward Compatibility
✓ Fully backward compatible - original usage unchanged:
- `@1 | get-url` still works as before
- `-query` flag still works for hash lookups
- `-store` flag still required for direct lookups
## Error Handling
- Returns exit code 1 if no matches found (search mode)
- Returns exit code 1 if no store configured
- Gracefully handles store backend errors
- Logs errors to stderr without crashing
+195
View File
@@ -0,0 +1,195 @@
✅ IMPLEMENTATION COMPLETE: get-url URL Search Enhancement
═══════════════════════════════════════════════════════════════════════════════
WHAT WAS IMPLEMENTED
────────────────────────────────────────────────────────────────────────────────
Enhanced the `get-url` command to search for URLs across all stores with:
1. PROTOCOL STRIPPING
- Removes: https://, http://, ftp://, and other scheme prefixes
- Removes: www. prefix (case-insensitive)
- Example: https://www.youtube.com/watch?v=abc → youtube.com/watch?v=abc
2. WILDCARD PATTERN MATCHING
- Asterisk (*): matches any sequence of characters
- Question mark (?): matches exactly one character
- Case-insensitive matching
- Example: youtube.com* matches all YouTube URLs
3. CROSS-STORE SEARCHING
- Searches all configured stores (Hydrus, Folder, etc.)
- Finds matching URLs for all files in all stores
- Returns results grouped by store
- Emits UrlItem objects for pipelining
═══════════════════════════════════════════════════════════════════════════════
COMMAND USAGE
────────────────────────────────────────────────────────────────────────────────
SEARCH MODE (NEW):
get-url -url "www.google.com"
get-url -url "youtube.com*"
get-url -url "*.example.com*"
ORIGINAL MODE (UNCHANGED):
@1 | get-url
═══════════════════════════════════════════════════════════════════════════════
PRACTICAL EXAMPLES
────────────────────────────────────────────────────────────────────────────────
1. Find all YouTube video URLs:
$ get-url -url "youtube.com*"
Results show all files with YouTube URLs
2. Find specific video by URL:
$ get-url -url "https://www.youtube.com/watch?v=xx_88TDWmEs"
Returns: youtube.com/watch?v=xx_88tdwmes (normalized pattern)
3. Find by domain:
$ get-url -url "google.com"
Matches: google.com, www.google.com/search, google.com/maps
4. Find by subdomain pattern:
$ get-url -url "*.example.com*"
Matches: cdn.example.com, api.example.com, www.example.com
5. Find by path pattern:
$ get-url -url "youtube.com/watch*"
Matches: youtube.com/watch?v=123 (NOT youtube.com/shorts/abc)
═══════════════════════════════════════════════════════════════════════════════
FILES MODIFIED / CREATED
────────────────────────────────────────────────────────────────────────────────
MAIN IMPLEMENTATION:
✓ cmdlet/get_url.py
- Added: _normalize_url_for_search() method
- Added: _match_url_pattern() method
- Added: _search_urls_across_stores() method
- Modified: run() method to handle -url flag
- Lines: 281 total (was 127)
DOCUMENTATION:
✓ docs/GET_URL_SEARCH.md - Full feature documentation
✓ docs/GET_URL_QUICK_REF.md - Quick reference guide
✓ ENHANCEMENT_SUMMARY.md - Technical summary
TESTING:
✓ test_get_url_search.py - Comprehensive test suite
- URL normalization tests: 6/6 passed ✓
- Pattern matching tests: 9/9 passed ✓
═══════════════════════════════════════════════════════════════════════════════
IMPLEMENTATION DETAILS
────────────────────────────────────────────────────────────────────────────────
NEW METHODS (Static):
_normalize_url_for_search(url: str) -> str
Strips protocol and www prefix, returns lowercase
Examples:
"https://www.youtube.com/watch?v=xx" → "youtube.com/watch?v=xx"
"http://www.google.com" → "google.com"
"ftp://files.example.com" → "files.example.com"
_match_url_pattern(url: str, pattern: str) -> bool
Normalizes both URL and pattern, uses fnmatch for wildcard matching
Returns True if URL matches pattern, False otherwise
NEW METHODS (Instance):
_search_urls_across_stores(pattern: str, config: Dict) -> Tuple[List[UrlItem], List[str]]
Searches all stores for matching URLs
Returns: (matched_items, stores_searched)
MODIFIED METHOD:
run(result, args, config) -> int
Now handles:
1. If -url flag provided: Search mode
2. Otherwise: Original mode (hash+store lookup)
Maintains full backward compatibility
═══════════════════════════════════════════════════════════════════════════════
BACKWARD COMPATIBILITY
────────────────────────────────────────────────────────────────────────────────
✓ FULLY COMPATIBLE
- Original usage: @1 | get-url (unchanged)
- -query flag: Still works for hash lookups
- -store flag: Still required for direct lookups
- Return codes: Unchanged (0 = success, 1 = not found/error)
═══════════════════════════════════════════════════════════════════════════════
TEST RESULTS
────────────────────────────────────────────────────────────────────────────────
All 15 tests passed ✓
URL Normalization (6 tests):
✓ https://www.youtube.com/watch?v=xx_88TDWmEs
✓ http://www.google.com
✓ ftp://files.example.com/path
✓ HTTPS://WWW.EXAMPLE.COM
✓ www.example.com
✓ example.com
Pattern Matching (9 tests):
✓ youtube.com* matches youtube.com/watch
✓ youtube.com/watch* matches youtube.com/watch?v=123
✓ youtube.com/shorts* does NOT match watch?v=123
✓ google.com matches google.com
✓ google.com* matches google.com/search
✓ *.example.com* matches cdn.example.com
✓ *example.com* matches cdn.example.com
✓ example.com does NOT match example.org
✓ reddit.com* matches reddit.com/r/videos
═══════════════════════════════════════════════════════════════════════════════
NEXT STEPS (OPTIONAL)
────────────────────────────────────────────────────────────────────────────────
Future enhancements could include:
1. Performance optimization: Cache results from stores
2. Regex support: --regex flag for complex patterns
3. Limit flag: --limit N to cap results
4. Filter by store: --store NAME to search specific stores only
5. Exclude duplicates: --unique flag to deduplicate URLs
6. Export options: --json, --csv output formats
═══════════════════════════════════════════════════════════════════════════════
VERIFICATION
────────────────────────────────────────────────────────────────────────────────
✓ Python syntax: Valid (py_compile passed)
✓ Imports: All dependencies available
✓ Command registration: Successful
✓ Test suite: All 15 tests pass
✓ Backward compatibility: Fully maintained
✓ Error handling: Graceful with stderr logging
✓ Documentation: Complete with examples
═══════════════════════════════════════════════════════════════════════════════
READY FOR PRODUCTION ✓
The get-url command is now ready to use for URL searching across all stores
with intelligent pattern matching and normalization.
Usage:
get-url -url "www.google.com"
get-url -url "youtube.com*"
get-url -url "*.example.com*"
═══════════════════════════════════════════════════════════════════════════════
+60 -17
View File
@@ -150,7 +150,9 @@ def _osd_overlay_set_ass(client: MPVIPCClient, ass_text: str) -> Optional[dict]:
def _osd_overlay_clear(client: MPVIPCClient) -> None:
client.send_command({"command": {"name": "osd-overlay", "id": _LYRIC_OSD_OVERLAY_ID, "format": "none"}})
client.send_command(
{"command": {"name": "osd-overlay", "id": _LYRIC_OSD_OVERLAY_ID, "format": "none"}}
)
def _log(msg: str) -> None:
@@ -181,6 +183,8 @@ def _ipc_get_property(
if resp and resp.get("error") == "success":
return resp.get("data", default)
return default
def _http_get_json(url: str, *, timeout_s: float = 10.0) -> Optional[dict]:
try:
req = Request(
@@ -262,7 +266,9 @@ def _wrap_plain_lyrics_as_lrc(text: str) -> str:
return "\n".join(out) + "\n"
def _fetch_lrclib(*, artist: Optional[str], title: Optional[str], duration_s: Optional[float] = None) -> Optional[str]:
def _fetch_lrclib(
*, artist: Optional[str], title: Optional[str], duration_s: Optional[float] = None
) -> Optional[str]:
base = "https://lrclib.net/api"
# Require both artist and title; title-only lookups cause frequent mismatches.
@@ -506,7 +512,9 @@ def _write_temp_sub_file(*, key: str, text: str) -> Path:
tmp_dir.mkdir(parents=True, exist_ok=True)
ext = _infer_sub_extension(text)
digest = hashlib.sha1((key + "\n" + (text or "")).encode("utf-8", errors="ignore")).hexdigest()[:16]
digest = hashlib.sha1((key + "\n" + (text or "")).encode("utf-8", errors="ignore")).hexdigest()[
:16
]
safe_key = hashlib.sha1((key or "").encode("utf-8", errors="ignore")).hexdigest()[:12]
path = (tmp_dir / f"sub-{safe_key}-{digest}{ext}").resolve()
path.write_text(text or "", encoding="utf-8", errors="replace")
@@ -747,7 +755,9 @@ def _infer_store_for_target(*, target: str, config: dict) -> Optional[str]:
continue
root = None
try:
root = getattr(backend, "_location", None) or getattr(backend, "location", lambda: None)()
root = (
getattr(backend, "_location", None) or getattr(backend, "location", lambda: None)()
)
except Exception:
root = None
if not root:
@@ -816,7 +826,9 @@ def run_auto_overlay(*, mpv: MPV, poll_s: float = 0.15, config: Optional[dict] =
while True:
try:
# Toggle support (mpv Lua script sets this property; default to visible).
visible_raw = _ipc_get_property(client, _LYRIC_VISIBLE_PROP, True, raise_on_disconnect=True)
visible_raw = _ipc_get_property(
client, _LYRIC_VISIBLE_PROP, True, raise_on_disconnect=True
)
raw_path = _ipc_get_property(client, "path", None, raise_on_disconnect=True)
except ConnectionError:
try:
@@ -915,7 +927,9 @@ def run_auto_overlay(*, mpv: MPV, poll_s: float = 0.15, config: Optional[dict] =
if is_http:
# HTTP/HTTPS targets are only valid if they map to a store backend.
store_from_url = _extract_store_from_url_target(target)
store_name = store_from_url or _infer_hydrus_store_from_url_target(target=target, config=cfg)
store_name = store_from_url or _infer_hydrus_store_from_url_target(
target=target, config=cfg
)
if not store_name:
_log("HTTP target has no store mapping; lyrics disabled")
current_store_name = None
@@ -962,7 +976,9 @@ def run_auto_overlay(*, mpv: MPV, poll_s: float = 0.15, config: Optional[dict] =
except Exception:
meta = None
if meta is None:
_log(f"HTTP target not found in store DB (store={store_name!r} hash={current_file_hash}); lyrics disabled")
_log(
f"HTTP target not found in store DB (store={store_name!r} hash={current_file_hash}); lyrics disabled"
)
current_store_name = None
current_backend = None
current_key = None
@@ -988,9 +1004,15 @@ def run_auto_overlay(*, mpv: MPV, poll_s: float = 0.15, config: Optional[dict] =
file_hash=current_file_hash,
config=cfg,
)
current_key = f"{current_store_name}:{current_file_hash}" if current_store_name and current_file_hash else None
current_key = (
f"{current_store_name}:{current_file_hash}"
if current_store_name and current_file_hash
else None
)
_log(f"Resolved store={current_store_name!r} hash={current_file_hash!r} valid={bool(current_key)}")
_log(
f"Resolved store={current_store_name!r} hash={current_file_hash!r} valid={bool(current_key)}"
)
if not current_key or not current_backend:
current_store_name = None
@@ -1010,7 +1032,13 @@ def run_auto_overlay(*, mpv: MPV, poll_s: float = 0.15, config: Optional[dict] =
# Load/reload lyrics when we have a resolvable key and it differs from what we loaded.
# This is important for the autofetch path: the note can appear without the mpv target changing.
if current_key and current_key != last_loaded_key and current_store_name and current_file_hash and current_backend:
if (
current_key
and current_key != last_loaded_key
and current_store_name
and current_file_hash
and current_backend
):
notes: Dict[str, str] = {}
try:
notes = current_backend.get_note(current_file_hash, config=cfg) or {}
@@ -1018,7 +1046,9 @@ def run_auto_overlay(*, mpv: MPV, poll_s: float = 0.15, config: Optional[dict] =
notes = {}
try:
_log(f"Loaded notes keys: {sorted([str(k) for k in notes.keys()]) if isinstance(notes, dict) else 'N/A'}")
_log(
f"Loaded notes keys: {sorted([str(k) for k in notes.keys()]) if isinstance(notes, dict) else 'N/A'}"
)
except Exception:
_log("Loaded notes keys: <error>")
@@ -1062,7 +1092,11 @@ def run_auto_overlay(*, mpv: MPV, poll_s: float = 0.15, config: Optional[dict] =
# Throttle attempts per key to avoid hammering APIs.
autofetch_enabled = bool(cfg.get("lyric_autofetch", True))
now = time.time()
if autofetch_enabled and current_key != last_fetch_attempt_key and (now - last_fetch_attempt_at) > 2.0:
if (
autofetch_enabled
and current_key != last_fetch_attempt_key
and (now - last_fetch_attempt_at) > 2.0
):
last_fetch_attempt_key = current_key
last_fetch_attempt_at = now
@@ -1082,7 +1116,9 @@ def run_auto_overlay(*, mpv: MPV, poll_s: float = 0.15, config: Optional[dict] =
except Exception:
pass
_log(f"Autofetch query artist={artist!r} title={title!r} duration={duration_s!r}")
_log(
f"Autofetch query artist={artist!r} title={title!r} duration={duration_s!r}"
)
if not artist or not title:
_log("Autofetch skipped: requires both artist and title")
@@ -1091,13 +1127,19 @@ def run_auto_overlay(*, mpv: MPV, poll_s: float = 0.15, config: Optional[dict] =
fetched = _fetch_lrclib(
artist=artist,
title=title,
duration_s=float(duration_s) if isinstance(duration_s, (int, float)) else None,
duration_s=(
float(duration_s) if isinstance(duration_s, (int, float)) else None
),
)
if not fetched or not fetched.strip():
fetched = _fetch_lyrics_ovh(artist=artist, title=title)
if fetched and fetched.strip():
try:
ok = bool(current_backend.set_note(current_file_hash, "lyric", fetched, config=cfg))
ok = bool(
current_backend.set_note(
current_file_hash, "lyric", fetched, config=cfg
)
)
_log(f"Autofetch stored lyric note ok={ok}")
# Next loop iteration will re-load the note.
except Exception as exc:
@@ -1187,7 +1229,9 @@ def run_overlay(*, mpv: MPV, entries: List[LrcLine], poll_s: float = 0.15) -> in
client = mpv.client()
if not client.connect():
print("mpv IPC is not reachable (is mpv running with --input-ipc-server?).", file=sys.stderr)
print(
"mpv IPC is not reachable (is mpv running with --input-ipc-server?).", file=sys.stderr
)
return 3
while True:
@@ -1240,7 +1284,6 @@ def run_overlay(*, mpv: MPV, entries: List[LrcLine], poll_s: float = 0.15) -> in
time.sleep(poll_s)
def main(argv: Optional[List[str]] = None) -> int:
parser = argparse.ArgumentParser(prog="python -m MPV.lyric", add_help=True)
parser.add_argument(
+64 -19
View File
@@ -133,9 +133,7 @@ def _windows_list_lyric_helper_pids(ipc_path: str) -> List[int]:
# Use CIM to query command lines; output as JSON for robust parsing.
# Note: `ConvertTo-Json` returns a number for single item, array for many, or null.
ps_script = (
"$ipc = "
+ json.dumps(ipc_path)
+ "; "
"$ipc = " + json.dumps(ipc_path) + "; "
"Get-CimInstance Win32_Process | "
"Where-Object { $_.CommandLine -and $_.CommandLine -match ' -m\\s+MPV\\.lyric(\\s|$)' -and $_.CommandLine -match ('--ipc\\s+' + [regex]::Escape($ipc)) } | "
"Select-Object -ExpandProperty ProcessId | ConvertTo-Json -Compress"
@@ -201,6 +199,7 @@ def _windows_kill_pids(pids: List[int]) -> None:
class MPVIPCError(Exception):
"""Raised when MPV IPC communication fails."""
pass
@@ -248,7 +247,9 @@ class MPV:
finally:
client.disconnect()
def send(self, command: Dict[str, Any] | List[Any], silent: bool = False) -> Optional[Dict[str, Any]]:
def send(
self, command: Dict[str, Any] | List[Any], silent: bool = False
) -> Optional[Dict[str, Any]]:
client = self.client(silent=bool(silent))
try:
if not client.connect():
@@ -308,7 +309,7 @@ class MPV:
pass
def _q(s: str) -> str:
return '"' + s.replace('\\', '\\\\').replace('"', '\\"') + '"'
return '"' + s.replace("\\", "\\\\").replace('"', '\\"') + '"'
pipeline = f"download-media -url {_q(url)} -format {_q(fmt)}"
if store:
@@ -329,10 +330,18 @@ class MPV:
"pipeline": pipeline,
}
except Exception as exc:
return {"success": False, "stdout": "", "stderr": "", "error": f"{type(exc).__name__}: {exc}", "pipeline": pipeline}
return {
"success": False,
"stdout": "",
"stderr": "",
"error": f"{type(exc).__name__}: {exc}",
"pipeline": pipeline,
}
def get_playlist(self, silent: bool = False) -> Optional[List[Dict[str, Any]]]:
resp = self.send({"command": ["get_property", "playlist"], "request_id": 100}, silent=silent)
resp = self.send(
{"command": ["get_property", "playlist"], "request_id": 100}, silent=silent
)
if resp is None:
return None
if resp.get("error") == "success":
@@ -467,7 +476,11 @@ class MPV:
env["PYTHONUNBUFFERED"] = "1"
try:
existing_pp = env.get("PYTHONPATH")
env["PYTHONPATH"] = str(repo_root) if not existing_pp else (str(repo_root) + os.pathsep + str(existing_pp))
env["PYTHONPATH"] = (
str(repo_root)
if not existing_pp
else (str(repo_root) + os.pathsep + str(existing_pp))
)
except Exception:
pass
kwargs["env"] = env
@@ -486,7 +499,13 @@ class MPV:
except Exception:
flags |= 0x08000000
kwargs["creationflags"] = flags
kwargs.update({k: v for k, v in _windows_hidden_subprocess_kwargs().items() if k != "creationflags"})
kwargs.update(
{
k: v
for k, v in _windows_hidden_subprocess_kwargs().items()
if k != "creationflags"
}
)
_LYRIC_PROCESS = subprocess.Popen(cmd, **kwargs)
debug(f"Lyric loader started (log={log_path})")
@@ -608,10 +627,22 @@ class MPV:
flags |= 0x08000000
kwargs["creationflags"] = flags
# startupinfo is harmless for GUI apps; helps hide flashes for console-subsystem builds.
kwargs.update({k: v for k, v in _windows_hidden_subprocess_kwargs().items() if k != "creationflags"})
kwargs.update(
{
k: v
for k, v in _windows_hidden_subprocess_kwargs().items()
if k != "creationflags"
}
)
debug("Starting MPV")
subprocess.Popen(cmd, stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, **kwargs)
subprocess.Popen(
cmd,
stdin=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
**kwargs,
)
# Start the persistent pipeline helper eagerly so MPV Lua can issue
# non-blocking requests (e.g., format list prefetch) without needing
@@ -634,7 +665,11 @@ class MPV:
helper_env = os.environ.copy()
try:
existing_pp = helper_env.get("PYTHONPATH")
helper_env["PYTHONPATH"] = str(repo_root) if not existing_pp else (str(repo_root) + os.pathsep + str(existing_pp))
helper_env["PYTHONPATH"] = (
str(repo_root)
if not existing_pp
else (str(repo_root) + os.pathsep + str(existing_pp))
)
except Exception:
pass
@@ -650,7 +685,13 @@ class MPV:
except Exception:
flags |= 0x08000000
helper_kwargs["creationflags"] = flags
helper_kwargs.update({k: v for k, v in _windows_hidden_subprocess_kwargs().items() if k != "creationflags"})
helper_kwargs.update(
{
k: v
for k, v in _windows_hidden_subprocess_kwargs().items()
if k != "creationflags"
}
)
helper_kwargs["cwd"] = str(repo_root)
helper_kwargs["env"] = helper_env
@@ -695,7 +736,7 @@ def _unwrap_memory_target(text: Optional[str]) -> Optional[str]:
return text
for line in text.splitlines():
line = line.strip()
if not line or line.startswith('#') or line.startswith('memory://'):
if not line or line.startswith("#") or line.startswith("memory://"):
continue
return line
return text
@@ -708,7 +749,9 @@ class MPVIPCClient:
It handles platform-specific differences (Windows named pipes vs Unix sockets).
"""
def __init__(self, socket_path: Optional[str] = None, timeout: float = 5.0, silent: bool = False):
def __init__(
self, socket_path: Optional[str] = None, timeout: float = 5.0, silent: bool = False
):
"""Initialize MPV IPC client.
Args:
@@ -838,7 +881,7 @@ class MPVIPCClient:
# Windows named pipes
try:
# Try to open the named pipe
self.sock = open(self.socket_path, 'r+b', buffering=0)
self.sock = open(self.socket_path, "r+b", buffering=0)
return True
except (OSError, IOError) as exc:
if not self.silent:
@@ -896,6 +939,7 @@ class MPVIPCClient:
# Debug: log the command being sent
from SYS.logger import debug as _debug
_debug(f"[IPC] Sending: {payload.strip()}")
# Send command
@@ -914,13 +958,15 @@ class MPVIPCClient:
break
try:
lines = response_data.decode('utf-8', errors='replace').strip().split('\n')
lines = response_data.decode("utf-8", errors="replace").strip().split("\n")
for line in lines:
if not line: continue
if not line:
continue
resp = json.loads(line)
# Debug: log responses
from SYS.logger import debug as _debug
_debug(f"[IPC] Received: {line}")
# Check if this is the response to our request
@@ -968,4 +1014,3 @@ class MPVIPCClient:
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
self.disconnect()
+11 -8
View File
@@ -32,8 +32,7 @@ def setup_logging(log_file: Optional[Path] = None) -> logging.Logger:
handler = logging.StreamHandler(sys.stderr)
formatter = logging.Formatter(
"[%(asctime)s][%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
"[%(asctime)s][%(levelname)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
)
handler.setFormatter(formatter)
logger.addHandler(handler)
@@ -89,13 +88,13 @@ def execute_pipeline(
capture_output=True,
text=True,
cwd=str(_ROOT_DIR),
env={**dict(__import__('os').environ), "MEDEIA_MPV_CALLER": "lua"},
env={**dict(__import__("os").environ), "MEDEIA_MPV_CALLER": "lua"},
)
if log_file:
log_to_helper(
f"[api] result returncode={result.returncode} len_stdout={len(result.stdout or '')} len_stderr={len(result.stderr or '')}",
log_file
log_file,
)
if result.stderr:
log_to_helper(f"[api] stderr: {result.stderr[:500]}", log_file)
@@ -146,16 +145,20 @@ def handle_api_request(request_json: str, log_file: Optional[Path] = None) -> st
return json.dumps(result)
else:
return json.dumps({
return json.dumps(
{
"success": False,
"error": f"Unknown command: {cmd}",
})
}
)
except Exception as exc:
return json.dumps({
return json.dumps(
{
"success": False,
"error": f"{type(exc).__name__}: {exc}",
})
}
)
if __name__ == "__main__":
+58 -19
View File
@@ -17,6 +17,7 @@ Protocol (user-data properties):
This helper is intentionally minimal: one request at a time, last-write-wins.
"""
from __future__ import annotations
MEDEIA_MPV_HELPER_VERSION = "2025-12-19"
@@ -73,8 +74,6 @@ READY_PROP = "user-data/medeia-pipeline-ready"
OBS_ID_REQUEST = 1001
def _run_pipeline(pipeline_text: str, *, seeds: Any = None) -> Dict[str, Any]:
# Import after sys.path fix.
from TUI.pipeline_runner import PipelineExecutor # noqa: WPS433
@@ -181,7 +180,13 @@ def _run_op(op: str, data: Any) -> Dict[str, Any]:
except Exception:
pass
cmd = [py, str((_repo_root() / "CLI.py").resolve()), "pipeline", "--pipeline", pipeline_text]
cmd = [
py,
str((_repo_root() / "CLI.py").resolve()),
"pipeline",
"--pipeline",
pipeline_text,
]
if seeds is not None:
try:
cmd.extend(["--seeds-json", json.dumps(seeds, ensure_ascii=False)])
@@ -265,7 +270,7 @@ def _run_op(op: str, data: Any) -> Dict[str, Any]:
name = None
if isinstance(instance_cfg, dict):
name = instance_cfg.get("NAME") or instance_cfg.get("name")
candidate = (str(name or instance_key or "").strip())
candidate = str(name or instance_key or "").strip()
if candidate:
seen.add(candidate)
choices = sorted(seen)
@@ -419,7 +424,11 @@ def _run_op(op: str, data: Any) -> Dict[str, Any]:
if dump and dump != "0" and isinstance(info, dict):
h = hashlib.sha1(url.encode("utf-8", errors="replace")).hexdigest()[:10]
out_path = _repo_root() / "Log" / f"ytdlp-probe-{h}.json"
out_path.write_text(json.dumps(info, ensure_ascii=False, indent=2), encoding="utf-8", errors="replace")
out_path.write_text(
json.dumps(info, ensure_ascii=False, indent=2),
encoding="utf-8",
errors="replace",
)
_append_helper_log(f"[ytdlp-formats] wrote probe json: {out_path}")
except Exception:
pass
@@ -610,7 +619,7 @@ def main(argv: Optional[list[str]] = None) -> int:
if debug_enabled:
logging.basicConfig(
level=logging.DEBUG,
format='[%(name)s] %(levelname)s: %(message)s',
format="[%(name)s] %(levelname)s: %(message)s",
stream=sys.stderr,
)
for noisy in ("httpx", "httpcore", "httpcore.http11", "httpcore.connection"):
@@ -628,13 +637,17 @@ def main(argv: Optional[list[str]] = None) -> int:
# Ensure single helper instance per ipc.
_lock_fh = _acquire_ipc_lock(str(args.ipc))
if _lock_fh is None:
_append_helper_log(f"[helper] another instance already holds lock for ipc={args.ipc}; exiting")
_append_helper_log(
f"[helper] another instance already holds lock for ipc={args.ipc}; exiting"
)
return 0
try:
_append_helper_log(f"[helper] version={MEDEIA_MPV_HELPER_VERSION} started ipc={args.ipc}")
try:
_append_helper_log(f"[helper] file={Path(__file__).resolve()} cwd={Path.cwd().resolve()}")
_append_helper_log(
f"[helper] file={Path(__file__).resolve()} cwd={Path.cwd().resolve()}"
)
except Exception:
pass
try:
@@ -651,6 +664,7 @@ def main(argv: Optional[list[str]] = None) -> int:
# Route SYS.logger output into the helper log file so diagnostics are not
# lost in mpv's console/terminal output.
try:
class _HelperLogStream:
def __init__(self) -> None:
self._pending = ""
@@ -726,7 +740,9 @@ def main(argv: Optional[list[str]] = None) -> int:
last_connect_error = f"{type(exc).__name__}: {exc}"
if time.time() > connect_deadline:
_append_helper_log(f"[helper] failed to connect ipc={args.ipc} error={last_connect_error or 'timeout'}")
_append_helper_log(
f"[helper] failed to connect ipc={args.ipc} error={last_connect_error or 'timeout'}"
)
return 2
# Keep trying.
@@ -789,18 +805,32 @@ def main(argv: Optional[list[str]] = None) -> int:
# can read immediately without waiting for a request/response cycle (which may timeout).
try:
startup_choices_payload = _run_op("store-choices", None)
startup_choices = startup_choices_payload.get("choices") if isinstance(startup_choices_payload, dict) else None
startup_choices = (
startup_choices_payload.get("choices")
if isinstance(startup_choices_payload, dict)
else None
)
if isinstance(startup_choices, list):
preview = ", ".join(str(x) for x in startup_choices[:50])
_append_helper_log(f"[helper] startup store-choices count={len(startup_choices)} items={preview}")
_append_helper_log(
f"[helper] startup store-choices count={len(startup_choices)} items={preview}"
)
# Publish to a cached property for Lua to read without IPC request.
try:
cached_json = json.dumps({"success": True, "choices": startup_choices}, ensure_ascii=False)
client.send_command_no_wait(["set_property_string", "user-data/medeia-store-choices-cached", cached_json])
_append_helper_log(f"[helper] published store-choices to user-data/medeia-store-choices-cached")
cached_json = json.dumps(
{"success": True, "choices": startup_choices}, ensure_ascii=False
)
client.send_command_no_wait(
["set_property_string", "user-data/medeia-store-choices-cached", cached_json]
)
_append_helper_log(
f"[helper] published store-choices to user-data/medeia-store-choices-cached"
)
except Exception as exc:
_append_helper_log(f"[helper] failed to publish store-choices: {type(exc).__name__}: {exc}")
_append_helper_log(
f"[helper] failed to publish store-choices: {type(exc).__name__}: {exc}"
)
else:
_append_helper_log("[helper] startup store-choices unavailable")
except Exception as exc:
@@ -809,11 +839,16 @@ def main(argv: Optional[list[str]] = None) -> int:
# Also publish config temp directory if available
try:
from config import load_config
cfg = load_config()
temp_dir = cfg.get("temp", "").strip() or os.getenv("TEMP") or "/tmp"
if temp_dir:
client.send_command_no_wait(["set_property_string", "user-data/medeia-config-temp", temp_dir])
_append_helper_log(f"[helper] published config temp to user-data/medeia-config-temp={temp_dir}")
client.send_command_no_wait(
["set_property_string", "user-data/medeia-config-temp", temp_dir]
)
_append_helper_log(
f"[helper] published config temp to user-data/medeia-config-temp={temp_dir}"
)
except Exception as exc:
_append_helper_log(f"[helper] failed to publish config temp: {type(exc).__name__}: {exc}")
@@ -854,7 +889,9 @@ def main(argv: Optional[list[str]] = None) -> int:
if "quic" in lower_prefix and "DEBUG:" in text:
continue
# Suppress progress-bar style lines (keep true errors).
if ("ETA" in text or "%" in text) and ("ERROR:" not in text and "WARNING:" not in text):
if ("ETA" in text or "%" in text) and (
"ERROR:" not in text and "WARNING:" not in text
):
# Typical yt-dlp progress bar line.
if text.lstrip().startswith("["):
continue
@@ -966,7 +1003,9 @@ def main(argv: Optional[list[str]] = None) -> int:
try:
# IMPORTANT: don't wait for a response here; waiting would consume
# async events and can drop/skip property-change notifications.
client.send_command_no_wait(["set_property_string", RESPONSE_PROP, json.dumps(resp, ensure_ascii=False)])
client.send_command_no_wait(
["set_property_string", RESPONSE_PROP, json.dumps(resp, ensure_ascii=False)]
)
except Exception:
# If posting results fails, there's nothing more useful to do.
pass
+58 -27
View File
@@ -95,12 +95,18 @@ class AllDebrid(Provider):
return None
# Quiet mode when download-file is mid-pipeline.
quiet = bool(self.config.get("_quiet_background_output")) if isinstance(self.config, dict) else False
quiet = (
bool(self.config.get("_quiet_background_output"))
if isinstance(self.config, dict)
else False
)
unlocked_url = target
try:
unlocked = client.unlock_link(target)
if isinstance(unlocked, str) and unlocked.strip().startswith(("http://", "https://")):
if isinstance(unlocked, str) and unlocked.strip().startswith(
("http://", "https://")
):
unlocked_url = unlocked.strip()
except Exception as exc:
# Fall back to the raw link, but warn.
@@ -136,7 +142,11 @@ class AllDebrid(Provider):
try:
if downloaded_path.exists():
size = downloaded_path.stat().st_size
if size > 0 and size <= 250_000 and downloaded_path.suffix.lower() not in (".html", ".htm"):
if (
size > 0
and size <= 250_000
and downloaded_path.suffix.lower() not in (".html", ".htm")
):
head = downloaded_path.read_bytes()[:512]
try:
text = head.decode("utf-8", errors="ignore").lower()
@@ -147,7 +157,10 @@ class AllDebrid(Provider):
downloaded_path.unlink()
except Exception:
pass
log("[alldebrid] Download returned HTML page (not file bytes). Try again or check AllDebrid link status.", file=sys.stderr)
log(
"[alldebrid] Download returned HTML page (not file bytes). Try again or check AllDebrid link status.",
file=sys.stderr,
)
return None
except Exception:
pass
@@ -160,7 +173,9 @@ class AllDebrid(Provider):
return None
@staticmethod
def _flatten_files(items: Any, *, _prefix: Optional[List[str]] = None) -> Iterable[Dict[str, Any]]:
def _flatten_files(
items: Any, *, _prefix: Optional[List[str]] = None
) -> Iterable[Dict[str, Any]]:
"""Flatten AllDebrid magnet file tree into file dicts, preserving relative paths.
API commonly returns:
@@ -185,17 +200,17 @@ class AllDebrid(Provider):
if not isinstance(node, dict):
continue
children = node.get('e') or node.get('children')
children = node.get("e") or node.get("children")
if isinstance(children, list):
folder_name = node.get('n') or node.get('name')
folder_name = node.get("n") or node.get("name")
next_prefix = prefix
if isinstance(folder_name, str) and folder_name.strip():
next_prefix = prefix + [folder_name.strip()]
yield from AllDebrid._flatten_files(children, _prefix=next_prefix)
continue
name = node.get('n') or node.get('name')
link = node.get('l') or node.get('link')
name = node.get("n") or node.get("name")
link = node.get("l") or node.get("link")
if isinstance(name, str) and name.strip() and isinstance(link, str) and link.strip():
rel_parts = prefix + [name.strip()]
relpath = "/".join([p for p in rel_parts if p])
@@ -253,10 +268,15 @@ class AllDebrid(Provider):
except Exception:
magnet_status = {}
magnet_name = str(magnet_status.get('filename') or magnet_status.get('name') or magnet_status.get('hash') or f"magnet-{magnet_id}")
status_code = magnet_status.get('statusCode')
status_text = str(magnet_status.get('status') or "").strip() or "unknown"
ready = status_code == 4 or bool(magnet_status.get('ready'))
magnet_name = str(
magnet_status.get("filename")
or magnet_status.get("name")
or magnet_status.get("hash")
or f"magnet-{magnet_id}"
)
status_code = magnet_status.get("statusCode")
status_text = str(magnet_status.get("status") or "").strip() or "unknown"
ready = status_code == 4 or bool(magnet_status.get("ready"))
if not ready:
return [
@@ -280,18 +300,23 @@ class AllDebrid(Provider):
try:
files_result = client.magnet_links([magnet_id])
magnet_files = files_result.get(str(magnet_id), {}) if isinstance(files_result, dict) else {}
file_tree = magnet_files.get('files', []) if isinstance(magnet_files, dict) else []
magnet_files = (
files_result.get(str(magnet_id), {}) if isinstance(files_result, dict) else {}
)
file_tree = magnet_files.get("files", []) if isinstance(magnet_files, dict) else []
except Exception as exc:
log(f"[alldebrid] Failed to list files for magnet {magnet_id}: {exc}", file=sys.stderr)
log(
f"[alldebrid] Failed to list files for magnet {magnet_id}: {exc}",
file=sys.stderr,
)
file_tree = []
results: List[SearchResult] = []
for file_node in self._flatten_files(file_tree):
file_name = str(file_node.get('n') or file_node.get('name') or '').strip()
file_url = str(file_node.get('l') or file_node.get('link') or '').strip()
relpath = str(file_node.get('_relpath') or file_name or '').strip()
file_size = file_node.get('s') or file_node.get('size')
file_name = str(file_node.get("n") or file_node.get("name") or "").strip()
file_url = str(file_node.get("l") or file_node.get("link") or "").strip()
relpath = str(file_node.get("_relpath") or file_name or "").strip()
file_size = file_node.get("s") or file_node.get("size")
if not file_name or not file_url:
continue
@@ -356,16 +381,21 @@ class AllDebrid(Provider):
continue
try:
magnet_id = int(magnet.get('id'))
magnet_id = int(magnet.get("id"))
except Exception:
continue
magnet_name = str(magnet.get('filename') or magnet.get('name') or magnet.get('hash') or f"magnet-{magnet_id}")
magnet_name = str(
magnet.get("filename")
or magnet.get("name")
or magnet.get("hash")
or f"magnet-{magnet_id}"
)
magnet_name_lower = magnet_name.lower()
status_text = str(magnet.get('status') or "").strip() or "unknown"
status_code = magnet.get('statusCode')
ready = status_code == 4 or bool(magnet.get('ready'))
status_text = str(magnet.get("status") or "").strip() or "unknown"
status_code = magnet.get("statusCode")
ready = status_code == 4 or bool(magnet.get("ready"))
if wanted_id is not None:
if magnet_id != wanted_id:
@@ -375,7 +405,7 @@ class AllDebrid(Provider):
size_bytes: Optional[int] = None
try:
size_val = magnet.get('size')
size_val = magnet.get("size")
if isinstance(size_val, (int, float)):
size_bytes = int(size_val)
elif isinstance(size_val, str) and size_val.isdigit():
@@ -392,7 +422,8 @@ class AllDebrid(Provider):
annotations=["folder"],
media_kind="folder",
size_bytes=size_bytes,
tag={"alldebrid", "folder", str(magnet_id)} | ({"ready"} if ready else {"not-ready"}),
tag={"alldebrid", "folder", str(magnet_id)}
| ({"ready"} if ready else {"not-ready"}),
columns=[
("Folder", magnet_name),
("ID", str(magnet_id)),
+23 -7
View File
@@ -38,7 +38,9 @@ class Bandcamp(Provider):
# Bandcamp discography lives under /music.
return base.rstrip("/") + "/music"
def _scrape_artist_page(self, page: Any, artist_url: str, limit: int = 50) -> List[SearchResult]:
def _scrape_artist_page(
self, page: Any, artist_url: str, limit: int = 50
) -> List[SearchResult]:
"""Scrape an artist page for albums/tracks (discography)."""
base = self._base_url(artist_url)
discography_url = self._discography_url(artist_url)
@@ -74,13 +76,15 @@ class Bandcamp(Provider):
target = base.rstrip("/") + "/" + href
title_node = item.query_selector("p.title") or item.query_selector(".title")
title = (title_node.inner_text().strip() if title_node else "")
title = title_node.inner_text().strip() if title_node else ""
if title:
title = " ".join(title.split())
if not title:
title = target.rsplit("/", 1)[-1]
kind = "album" if "/album/" in target else ("track" if "/track/" in target else "item")
kind = (
"album" if "/album/" in target else ("track" if "/track/" in target else "item")
)
results.append(
SearchResult(
@@ -107,7 +111,9 @@ class Bandcamp(Provider):
return results
def selector(self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any) -> bool:
def selector(
self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any
) -> bool:
"""Handle Bandcamp `@N` selection.
If the selected item is an ARTIST result, selecting it auto-expands into
@@ -155,12 +161,16 @@ class Bandcamp(Provider):
continue
title = str(payload.get("title") or "").strip()
url_val = str(payload.get("url") or payload.get("path") or meta.get("url") or "").strip()
url_val = str(
payload.get("url") or payload.get("path") or meta.get("url") or ""
).strip()
base = self._base_url(url_val)
if not base:
continue
chosen.append({"title": title, "url": base, "location": str(meta.get("artist") or "").strip()})
chosen.append(
{"title": title, "url": base, "location": str(meta.get("artist") or "").strip()}
)
if not chosen:
return False
@@ -198,7 +208,13 @@ class Bandcamp(Provider):
try:
results_payload.append(r.to_dict())
except Exception:
results_payload.append({"table": "bandcamp", "title": getattr(r, "title", ""), "path": getattr(r, "path", "")})
results_payload.append(
{
"table": "bandcamp",
"title": getattr(r, "title", ""),
"path": getattr(r, "path", ""),
}
)
try:
ctx.set_last_result_table(table, results_payload)
+18 -6
View File
@@ -76,7 +76,9 @@ class FileIO(Provider):
data: Dict[str, Any] = {}
expires = kwargs.get("expires", self._default_expires)
max_downloads = kwargs.get("maxDownloads", kwargs.get("max_downloads", self._default_max_downloads))
max_downloads = kwargs.get(
"maxDownloads", kwargs.get("max_downloads", self._default_max_downloads)
)
auto_delete = kwargs.get("autoDelete", kwargs.get("auto_delete", self._default_auto_delete))
if expires not in (None, ""):
@@ -112,7 +114,9 @@ class FileIO(Provider):
if response.status_code >= 400:
location = response.headers.get("location") or response.headers.get("Location")
ct = response.headers.get("content-type") or response.headers.get("Content-Type")
raise Exception(f"Upload failed: {response.status_code} (content-type={ct}, location={location}) - {response.text}")
raise Exception(
f"Upload failed: {response.status_code} (content-type={ct}, location={location}) - {response.text}"
)
payload: Any
try:
@@ -122,9 +126,13 @@ class FileIO(Provider):
# If the server ignored our Accept header and returned HTML, this is almost
# certainly the wrong endpoint or an upstream block.
ct = (response.headers.get("content-type") or response.headers.get("Content-Type") or "").lower()
ct = (
response.headers.get("content-type") or response.headers.get("Content-Type") or ""
).lower()
if (payload is None) and ("text/html" in ct):
raise Exception("file.io returned HTML instead of JSON; expected API response from /upload")
raise Exception(
"file.io returned HTML instead of JSON; expected API response from /upload"
)
if isinstance(payload, dict) and payload.get("success") is False:
reason = payload.get("message") or payload.get("error") or payload.get("status")
@@ -149,14 +157,18 @@ class FileIO(Provider):
snippet = snippet[:300] + "..."
except Exception:
snippet = "<unreadable response>"
raise Exception(f"Upload succeeded but response did not include a link (response: {snippet})")
raise Exception(
f"Upload succeeded but response did not include a link (response: {snippet})"
)
try:
pipe_obj = kwargs.get("pipe_obj")
if pipe_obj is not None:
from Store import Store
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(pipe_obj, uploaded_url)
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(
pipe_obj, uploaded_url
)
except Exception:
pass
+13 -4
View File
@@ -110,7 +110,12 @@ def is_download_file_url(url: str) -> bool:
if not host.endswith("archive.org"):
return False
# /download/<identifier>/<filename>
return len(parts) >= 3 and parts[0].lower() == "download" and bool(parts[1].strip()) and bool(parts[2].strip())
return (
len(parts) >= 3
and parts[0].lower() == "download"
and bool(parts[1].strip())
and bool(parts[2].strip())
)
def list_download_files(identifier: str) -> List[Dict[str, Any]]:
@@ -482,12 +487,14 @@ class InternetArchive(Provider):
name = f.get("name")
if not name:
continue
files.append({
files.append(
{
"name": str(name),
"size": getattr(f, "size", None),
"format": getattr(f, "format", None),
"source": getattr(f, "source", None),
})
}
)
except Exception:
files = []
@@ -657,7 +664,9 @@ class InternetArchive(Provider):
if pipe_obj is not None:
from Store import Store
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(pipe_obj, item_url)
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(
pipe_obj, item_url
)
except Exception:
pass
+144 -34
View File
@@ -111,7 +111,19 @@ def _parse_libgen_ads_tags_html(html: str) -> Dict[str, Any]:
score = 0
for ln in lines:
lo = ln.lower()
if ":" in ln and any(k in lo for k in ("title", "author", "publisher", "year", "isbn", "language", "series", "tags")):
if ":" in ln and any(
k in lo
for k in (
"title",
"author",
"publisher",
"year",
"isbn",
"language",
"series",
"tags",
)
):
score += 1
if score > best_score:
best_score = score
@@ -260,7 +272,9 @@ def _prefer_isbn(isbns: List[str]) -> str:
return vals[0] if vals else ""
def _enrich_book_tags_from_isbn(isbn: str, *, config: Optional[Dict[str, Any]] = None) -> Tuple[List[str], str]:
def _enrich_book_tags_from_isbn(
isbn: str, *, config: Optional[Dict[str, Any]] = None
) -> Tuple[List[str], str]:
"""Return (tags, source_name) for the given ISBN.
Priority:
@@ -378,7 +392,9 @@ def _enrich_book_tags_from_isbn(isbn: str, *, config: Optional[Dict[str, Any]] =
return [], ""
def _fetch_libgen_details_html(url: str, *, timeout: Optional[Tuple[float, float]] = None) -> Optional[str]:
def _fetch_libgen_details_html(
url: str, *, timeout: Optional[Tuple[float, float]] = None
) -> Optional[str]:
try:
if timeout is None:
timeout = (DEFAULT_CONNECT_TIMEOUT, DEFAULT_READ_TIMEOUT)
@@ -450,7 +466,9 @@ def _parse_libgen_details_html(html: str) -> Dict[str, Any]:
label = label[:-1].strip()
chunk_start = m.end()
chunk_end = strong_matches[idx + 1].start() if (idx + 1) < len(strong_matches) else len(s)
chunk_end = (
strong_matches[idx + 1].start() if (idx + 1) < len(strong_matches) else len(s)
)
raw_val_html = s[chunk_start:chunk_end]
# If we already have a value for this label from a table row, keep it.
@@ -600,7 +618,19 @@ def _libgen_metadata_to_tags(meta: Dict[str, Any]) -> List[str]:
if isinstance(raw_fields, dict):
for k, v in raw_fields.items():
lk = str(k or "").strip().lower()
if lk in {"title", "author(s)", "authors", "author", "publisher", "year", "isbn", "language", "oclc/worldcat", "tags", "edition id"}:
if lk in {
"title",
"author(s)",
"authors",
"author",
"publisher",
"year",
"isbn",
"language",
"oclc/worldcat",
"tags",
"edition id",
}:
continue
vv = str(v or "").strip()
if not vv:
@@ -755,7 +785,15 @@ class Libgen(Provider):
if title and title.startswith("http"):
title = ""
base_name = sanitize_filename(title or md5 or (f"libgen_{_libgen_id_from_url(target)}" if _libgen_id_from_url(target) else "libgen"))
base_name = sanitize_filename(
title
or md5
or (
f"libgen_{_libgen_id_from_url(target)}"
if _libgen_id_from_url(target)
else "libgen"
)
)
out_path = output_dir / base_name
if extension:
out_path = out_path.with_suffix(f".{extension}")
@@ -782,14 +820,23 @@ class Libgen(Provider):
return
total = int(content_length) if content_length and content_length > 0 else None
downloaded = int(bytes_downloaded) if bytes_downloaded and bytes_downloaded > 0 else 0
downloaded = (
int(bytes_downloaded) if bytes_downloaded and bytes_downloaded > 0 else 0
)
elapsed = max(0.001, now - start_time)
speed = downloaded / elapsed
progress_bar.update(downloaded=downloaded, total=total, label=str(label or "download"), file=sys.stderr)
progress_bar.update(
downloaded=downloaded,
total=total,
label=str(label or "download"),
file=sys.stderr,
)
last_progress_time[0] = now
ok, final_path = download_from_mirror(target, out_path, progress_callback=progress_callback)
ok, final_path = download_from_mirror(
target, out_path, progress_callback=progress_callback
)
progress_bar.finish()
if ok and final_path:
# After the download completes, best-effort fetch details metadata (title + ISBN)
@@ -802,9 +849,13 @@ class Libgen(Provider):
# Parse it post-download (best-effort) and do NOT perform external
# enrichment (OpenLibrary/isbnsearch) unless the user later chooses to.
if ("/ads.php" in low) or ("/get.php" in low):
ads_url = target if "/ads.php" in low else _libgen_ads_url_for_target(target)
ads_url = (
target if "/ads.php" in low else _libgen_ads_url_for_target(target)
)
if ads_url:
html = _fetch_libgen_details_html(ads_url, timeout=(DEFAULT_CONNECT_TIMEOUT, 4.0))
html = _fetch_libgen_details_html(
ads_url, timeout=(DEFAULT_CONNECT_TIMEOUT, 4.0)
)
if html:
meta = _parse_libgen_ads_tags_html(html)
extracted_title = str(meta.get("title") or "").strip()
@@ -814,8 +865,12 @@ class Libgen(Provider):
if (not title) or title.startswith("http"):
title = extracted_title
authors = meta.get("authors") if isinstance(meta.get("authors"), list) else []
for a in (authors or []):
authors = (
meta.get("authors")
if isinstance(meta.get("authors"), list)
else []
)
for a in authors or []:
aa = str(a or "").strip()
if aa:
result.tag.add(f"author:{aa}")
@@ -835,15 +890,25 @@ class Libgen(Provider):
md["language"] = language
result.tag.add(f"language:{language}")
isbns = meta.get("isbn") if isinstance(meta.get("isbn"), list) else []
isbns = [str(x).strip() for x in (isbns or []) if str(x).strip()]
isbns = (
meta.get("isbn")
if isinstance(meta.get("isbn"), list)
else []
)
isbns = [
str(x).strip() for x in (isbns or []) if str(x).strip()
]
if isbns:
md["isbn"] = isbns
for isbn_val in isbns:
result.tag.add(f"isbn:{isbn_val}")
free_tags = meta.get("tags") if isinstance(meta.get("tags"), list) else []
for t in (free_tags or []):
free_tags = (
meta.get("tags")
if isinstance(meta.get("tags"), list)
else []
)
for t in free_tags or []:
tt = str(t or "").strip()
if tt:
result.tag.add(tt)
@@ -853,7 +918,16 @@ class Libgen(Provider):
if isinstance(raw_fields, dict):
for k, v in raw_fields.items():
lk = str(k or "").strip().lower()
if lk in {"title", "author", "authors", "publisher", "year", "isbn", "language", "tags"}:
if lk in {
"title",
"author",
"authors",
"publisher",
"year",
"isbn",
"language",
"tags",
}:
continue
vv = str(v or "").strip()
if not vv:
@@ -863,7 +937,11 @@ class Libgen(Provider):
result.tag.add(f"libgen_{ns}:{vv}")
# Legacy: edition/file/series details pages (title + ISBN) + external enrichment.
if ("/edition.php" in low) or ("/file.php" in low) or ("/series.php" in low):
if (
("/edition.php" in low)
or ("/file.php" in low)
or ("/series.php" in low)
):
html = _fetch_libgen_details_html(target)
if html:
meta = _parse_libgen_details_html(html)
@@ -874,8 +952,14 @@ class Libgen(Provider):
meta["edition_id"] = eid
extracted_title = str(meta.get("title") or "").strip()
extracted_isbns = meta.get("isbn") if isinstance(meta.get("isbn"), list) else []
extracted_isbns = [str(x).strip() for x in (extracted_isbns or []) if str(x).strip()]
extracted_isbns = (
meta.get("isbn") if isinstance(meta.get("isbn"), list) else []
)
extracted_isbns = [
str(x).strip()
for x in (extracted_isbns or [])
if str(x).strip()
]
if extracted_title:
md["title"] = extracted_title
@@ -955,9 +1039,11 @@ class LibgenSearch:
def __init__(self, session: Optional[requests.Session] = None):
self.session = session or requests.Session()
self.session.headers.update({
self.session.headers.update(
{
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
})
}
)
def _search_libgen_json(
self,
@@ -1005,7 +1091,8 @@ class LibgenSearch:
download_link = f"http://library.lol/main/{md5}" if md5 else ""
results.append({
results.append(
{
"id": str(raw_id),
"title": str(title),
"author": str(author),
@@ -1018,7 +1105,8 @@ class LibgenSearch:
"md5": str(md5),
"mirror_url": download_link,
"cover": "",
})
}
)
if len(results) >= limit:
break
@@ -1063,7 +1151,9 @@ class LibgenSearch:
# Try JSON first on *all* mirrors (including .gl/.li), then fall back to HTML scraping.
results: List[Dict[str, Any]] = []
try:
results = self._search_libgen_json(mirror, query, limit, timeout=request_timeout)
results = self._search_libgen_json(
mirror, query, limit, timeout=request_timeout
)
except Exception:
results = []
@@ -1072,9 +1162,13 @@ class LibgenSearch:
continue
if "libgen.li" in mirror or "libgen.gl" in mirror:
results = self._search_libgen_li(mirror, query, limit, timeout=request_timeout)
results = self._search_libgen_li(
mirror, query, limit, timeout=request_timeout
)
else:
results = self._search_libgen_rs(mirror, query, limit, timeout=request_timeout)
results = self._search_libgen_rs(
mirror, query, limit, timeout=request_timeout
)
if results:
_call(log_info, f"[libgen] Using mirror: {mirror}")
@@ -1477,28 +1571,40 @@ def _resolve_download_url(
# get.php?md5=... -> file response
# Handle edition -> file links.
m = re.search(r'href=["\']([^"\']*file\.php\?id=\d+[^"\']*)["\']', html, flags=re.IGNORECASE)
m = re.search(
r'href=["\']([^"\']*file\.php\?id=\d+[^"\']*)["\']', html, flags=re.IGNORECASE
)
if m:
href = str(m.group(1) or "").strip()
if href and not href.lower().startswith("javascript:"):
return urljoin(base_url, href)
# Handle series -> edition links.
m = re.search(r'href=["\']([^"\']*edition\.php\?id=\d+[^"\']*)["\']', html, flags=re.IGNORECASE)
m = re.search(
r'href=["\']([^"\']*edition\.php\?id=\d+[^"\']*)["\']', html, flags=re.IGNORECASE
)
if m:
href = str(m.group(1) or "").strip()
if href and not href.lower().startswith("javascript:"):
return urljoin(base_url, href)
# Handle file -> ads/get links (sometimes present as the "Libgen" mirror).
m = re.search(r'href=["\']([^"\']*ads\.php\?md5=[a-fA-F0-9]{32}[^"\']*)["\']', html, flags=re.IGNORECASE)
m = re.search(
r'href=["\']([^"\']*ads\.php\?md5=[a-fA-F0-9]{32}[^"\']*)["\']',
html,
flags=re.IGNORECASE,
)
if m:
href = str(m.group(1) or "").strip()
if href and not href.lower().startswith("javascript:"):
return urljoin(base_url, href)
# Prefer explicit get.php md5 links (most common successful chain).
m = re.search(r'href=["\']([^"\']*get\.php\?md5=[a-fA-F0-9]{32}[^"\']*)["\']', html, flags=re.IGNORECASE)
m = re.search(
r'href=["\']([^"\']*get\.php\?md5=[a-fA-F0-9]{32}[^"\']*)["\']',
html,
flags=re.IGNORECASE,
)
if m:
href = str(m.group(1) or "").strip()
if href and not href.lower().startswith("javascript:"):
@@ -1540,7 +1646,9 @@ def _resolve_download_url(
_call(log_info, f"[resolve] Checking: {current_url}")
if current_url.lower().endswith((".pdf", ".epub", ".mobi", ".djvu", ".azw3", ".cbz", ".cbr")):
if current_url.lower().endswith(
(".pdf", ".epub", ".mobi", ".djvu", ".azw3", ".cbz", ".cbr")
):
return current_url
try:
@@ -1618,7 +1726,9 @@ def _guess_filename_extension(download_url: str, headers: Dict[str, str]) -> Opt
"""Guess the file extension from headers or the download URL."""
content_disposition = headers.get("content-disposition", "")
if content_disposition:
match = re.search(r"filename\*?=(?:UTF-8\'\'|\"?)([^\";]+)", content_disposition, flags=re.IGNORECASE)
match = re.search(
r"filename\*?=(?:UTF-8\'\'|\"?)([^\";]+)", content_disposition, flags=re.IGNORECASE
)
if match:
filename = unquote(match.group(1).strip('"'))
suffix = Path(filename).suffix
+3 -1
View File
@@ -56,7 +56,9 @@ class LOC(Provider):
page_size = max(1, min(int(limit), 50))
while len(results) < max(0, int(limit)):
payload = client.search_chronicling_america(q, start=start, count=page_size, extra_params=extra)
payload = client.search_chronicling_america(
q, start=start, count=page_size, extra_params=extra
)
items = payload.get("results")
if not isinstance(items, list) or not items:
break
+81 -25
View File
@@ -30,7 +30,7 @@ def _sniff_mime_from_header(path: Path) -> Optional[str]:
return None
# Images
if header.startswith(b"\xFF\xD8\xFF"):
if header.startswith(b"\xff\xd8\xff"):
return "image/jpeg"
if header.startswith(b"\x89PNG\r\n\x1a\n"):
return "image/png"
@@ -57,7 +57,7 @@ def _sniff_mime_from_header(path: Path) -> Optional[str]:
# Video
if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"AVI ":
return "video/x-msvideo"
if header.startswith(b"\x1A\x45\xDF\xA3"):
if header.startswith(b"\x1a\x45\xdf\xa3"):
# EBML container: Matroska/WebM.
return "video/x-matroska"
if len(header) >= 12 and header[4:8] == b"ftyp":
@@ -79,7 +79,9 @@ def _sniff_mime_from_header(path: Path) -> Optional[str]:
return None
def _classify_matrix_upload(path: Path, *, explicit_mime_type: Optional[str] = None) -> Tuple[str, str]:
def _classify_matrix_upload(
path: Path, *, explicit_mime_type: Optional[str] = None
) -> Tuple[str, str]:
"""Return (mime_type, msgtype) for Matrix uploads."""
mime_type = str(explicit_mime_type or "").strip() or None
@@ -111,8 +113,35 @@ def _classify_matrix_upload(path: Path, *, explicit_mime_type: Optional[str] = N
# Final fallback for unknown MIME types.
if msgtype == "m.file":
audio_exts = {".mp3", ".flac", ".wav", ".m4a", ".aac", ".ogg", ".opus", ".wma", ".mka", ".alac"}
video_exts = {".mp4", ".mkv", ".webm", ".mov", ".avi", ".flv", ".mpg", ".mpeg", ".ts", ".m4v", ".wmv", ".m2ts", ".mts", ".3gp", ".ogv"}
audio_exts = {
".mp3",
".flac",
".wav",
".m4a",
".aac",
".ogg",
".opus",
".wma",
".mka",
".alac",
}
video_exts = {
".mp4",
".mkv",
".webm",
".mov",
".avi",
".flv",
".mpg",
".mpeg",
".ts",
".m4v",
".wmv",
".m2ts",
".mts",
".3gp",
".ogv",
}
image_exts = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff"}
if ext in audio_exts:
msgtype = "m.audio"
@@ -133,7 +162,9 @@ def _normalize_homeserver(value: str) -> str:
return text.rstrip("/")
def _matrix_health_check(*, homeserver: str, access_token: Optional[str]) -> Tuple[bool, Optional[str]]:
def _matrix_health_check(
*, homeserver: str, access_token: Optional[str]
) -> Tuple[bool, Optional[str]]:
"""Lightweight Matrix reachability/auth validation.
- Always checks `/versions` (no auth).
@@ -150,7 +181,9 @@ def _matrix_health_check(*, homeserver: str, access_token: Optional[str]) -> Tup
if access_token:
headers = {"Authorization": f"Bearer {access_token}"}
resp = requests.get(f"{base}/_matrix/client/v3/account/whoami", headers=headers, timeout=5)
resp = requests.get(
f"{base}/_matrix/client/v3/account/whoami", headers=headers, timeout=5
)
if resp.status_code != 200:
return False, f"Authentication failed: {resp.status_code}"
@@ -167,7 +200,11 @@ class Matrix(Provider):
self._init_ok: Optional[bool] = None
self._init_reason: Optional[str] = None
matrix_conf = self.config.get("provider", {}).get("matrix", {}) if isinstance(self.config, dict) else {}
matrix_conf = (
self.config.get("provider", {}).get("matrix", {})
if isinstance(self.config, dict)
else {}
)
homeserver = matrix_conf.get("homeserver")
access_token = matrix_conf.get("access_token")
password = matrix_conf.get("password")
@@ -183,7 +220,9 @@ class Matrix(Provider):
cache_key = f"{_normalize_homeserver(str(homeserver))}|has_token:{bool(access_token)}"
cached = _MATRIX_INIT_CHECK_CACHE.get(cache_key)
if cached is None:
ok, reason = _matrix_health_check(homeserver=str(homeserver), access_token=str(access_token) if access_token else None)
ok, reason = _matrix_health_check(
homeserver=str(homeserver), access_token=str(access_token) if access_token else None
)
_MATRIX_INIT_CHECK_CACHE[cache_key] = (ok, reason)
else:
ok, reason = cached
@@ -286,6 +325,7 @@ class Matrix(Provider):
def upload_to_room(self, file_path: str, room_id: str, **kwargs: Any) -> str:
"""Upload a file and send it to a specific room."""
from models import ProgressFileReader
path = Path(file_path)
if not path.exists():
raise FileNotFoundError(f"File not found: {file_path}")
@@ -298,7 +338,9 @@ class Matrix(Provider):
"Content-Type": "application/octet-stream",
}
mime_type, msgtype = _classify_matrix_upload(path, explicit_mime_type=kwargs.get("mime_type"))
mime_type, msgtype = _classify_matrix_upload(
path, explicit_mime_type=kwargs.get("mime_type")
)
headers["Content-Type"] = mime_type
filename = path.name
@@ -306,8 +348,12 @@ class Matrix(Provider):
# Upload media
upload_url = f"{base}/_matrix/media/v3/upload"
with open(path, "rb") as handle:
wrapped = ProgressFileReader(handle, total_bytes=int(path.stat().st_size), label="upload")
resp = requests.post(upload_url, headers=headers, data=wrapped, params={"filename": filename})
wrapped = ProgressFileReader(
handle, total_bytes=int(path.stat().st_size), label="upload"
)
resp = requests.post(
upload_url, headers=headers, data=wrapped, params={"filename": filename}
)
if resp.status_code != 200:
raise Exception(f"Matrix upload failed: {resp.text}")
content_uri = (resp.json() or {}).get("content_uri")
@@ -320,7 +366,7 @@ class Matrix(Provider):
try:
curi = str(content_uri or "").strip()
if curi.startswith("mxc://"):
rest = curi[len("mxc://"):]
rest = curi[len("mxc://") :]
if "/" in rest:
server_name, media_id = rest.split("/", 1)
server_name = str(server_name).strip()
@@ -343,7 +389,11 @@ class Matrix(Provider):
raise Exception(f"Matrix send message failed: {send_resp.text}")
event_id = (send_resp.json() or {}).get("event_id")
link = f"https://matrix.to/#/{room_id}/{event_id}" if event_id else f"https://matrix.to/#/{room_id}"
link = (
f"https://matrix.to/#/{room_id}/{event_id}"
if event_id
else f"https://matrix.to/#/{room_id}"
)
# Optional: if a PipeObject is provided and it already has store+hash,
# attach the uploaded URL back to the stored file.
@@ -381,7 +431,11 @@ class Matrix(Provider):
raise Exception(f"Matrix send text failed: {send_resp.text}")
event_id = (send_resp.json() or {}).get("event_id")
return f"https://matrix.to/#/{room_id}/{event_id}" if event_id else f"https://matrix.to/#/{room_id}"
return (
f"https://matrix.to/#/{room_id}/{event_id}"
if event_id
else f"https://matrix.to/#/{room_id}"
)
def upload(self, file_path: str, **kwargs: Any) -> str:
matrix_conf = self.config.get("provider", {}).get("matrix", {})
@@ -390,7 +444,9 @@ class Matrix(Provider):
raise Exception("Matrix room_id missing")
return self.upload_to_room(file_path, str(room_id))
def selector(self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any) -> bool:
def selector(
self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any
) -> bool:
"""Handle Matrix room selection via `@N`.
If the CLI has a pending upload stash, selecting a room triggers an upload.
@@ -400,7 +456,7 @@ class Matrix(Provider):
pending = None
try:
pending = ctx.load_value('matrix_pending_uploads', default=None)
pending = ctx.load_value("matrix_pending_uploads", default=None)
except Exception:
pending = None
@@ -412,9 +468,9 @@ class Matrix(Provider):
for item in selected_items or []:
rid = None
if isinstance(item, dict):
rid = item.get('room_id') or item.get('id')
rid = item.get("room_id") or item.get("id")
else:
rid = getattr(item, 'room_id', None) or getattr(item, 'id', None)
rid = getattr(item, "room_id", None) or getattr(item, "id", None)
if rid and str(rid).strip():
room_ids.append(str(rid).strip())
@@ -426,15 +482,15 @@ class Matrix(Provider):
for room_id in room_ids:
for payload in pending_list:
try:
file_path = ''
file_path = ""
delete_after = False
pipe_obj = None
if isinstance(payload, dict):
file_path = str(payload.get('path') or '')
delete_after = bool(payload.get('delete_after', False))
pipe_obj = payload.get('pipe_obj')
file_path = str(payload.get("path") or "")
delete_after = bool(payload.get("delete_after", False))
pipe_obj = payload.get("pipe_obj")
else:
file_path = str(getattr(payload, 'path', '') or '')
file_path = str(getattr(payload, "path", "") or "")
if not file_path:
any_failed = True
continue
@@ -463,7 +519,7 @@ class Matrix(Provider):
print(f"Matrix upload failed: {exc}")
try:
ctx.store_value('matrix_pending_uploads', [])
ctx.store_value("matrix_pending_uploads", [])
except Exception:
pass
+27 -16
View File
@@ -100,7 +100,10 @@ class OpenLibraryMetadataProvider(MetadataProvider):
try:
# Prefer ISBN-specific search when the query looks like one
if query_clean.replace("-", "").isdigit() and len(query_clean.replace("-", "")) in (10, 13):
if query_clean.replace("-", "").isdigit() and len(query_clean.replace("-", "")) in (
10,
13,
):
q = f"isbn:{query_clean.replace('-', '')}"
else:
q = query_clean
@@ -135,7 +138,8 @@ class OpenLibraryMetadataProvider(MetadataProvider):
if isinstance(key, str) and key:
olid = key.split("/")[-1]
items.append({
items.append(
{
"title": doc.get("title") or "",
"artist": ", ".join(authors) if authors else "",
"album": publisher,
@@ -151,7 +155,8 @@ class OpenLibraryMetadataProvider(MetadataProvider):
"lccn": (doc.get("lccn") or [None])[0],
},
"description": None,
})
}
)
return items
@@ -238,7 +243,8 @@ class GoogleBooksMetadataProvider(MetadataProvider):
else:
identifiers.setdefault(ident_type, ident_value)
items.append({
items.append(
{
"title": info.get("title") or "",
"artist": ", ".join(authors) if authors else "",
"album": publisher,
@@ -248,7 +254,8 @@ class GoogleBooksMetadataProvider(MetadataProvider):
"publisher": publisher,
"identifiers": identifiers,
"description": info.get("description", ""),
})
}
)
return items
@@ -346,7 +353,9 @@ class ISBNsearchMetadataProvider(MetadataProvider):
chunk_start = m.end()
# Stop at next <strong> or end of document.
chunk_end = strong_matches[idx + 1].start() if (idx + 1) < len(strong_matches) else len(html)
chunk_end = (
strong_matches[idx + 1].start() if (idx + 1) < len(strong_matches) else len(html)
)
chunk = html[chunk_start:chunk_end]
# Prefer stopping within the same paragraph when possible.
m_end = re.search(r"(?is)(</p>|<br\s*/?>)", chunk)
@@ -389,7 +398,10 @@ class ISBNsearchMetadataProvider(MetadataProvider):
# Prefer parsed title, but fall back to og:title if needed.
if not title:
m_og = re.search(r"(?is)<meta\b[^>]*property=['\"]og:title['\"][^>]*content=['\"](.*?)['\"][^>]*>", html)
m_og = re.search(
r"(?is)<meta\b[^>]*property=['\"]og:title['\"][^>]*content=['\"](.*?)['\"][^>]*>",
html,
)
if m_og:
title = self._strip_html_to_text(m_og.group(1))
@@ -534,7 +546,8 @@ class MusicBrainzMetadataProvider(MetadataProvider):
year = str(release_date)[:4] if release_date else ""
mbid = rec.get("id") or ""
items.append({
items.append(
{
"title": title,
"artist": artist,
"album": album,
@@ -542,7 +555,8 @@ class MusicBrainzMetadataProvider(MetadataProvider):
"provider": self.name,
"mbid": mbid,
"raw": rec,
})
}
)
return items
@@ -624,12 +638,7 @@ class YtdlpMetadataProvider(MetadataProvider):
# Provide basic columns for the standard metadata selection table.
# NOTE: This is best-effort; many extractors don't provide artist/album.
artist = (
info.get("artist")
or info.get("uploader")
or info.get("channel")
or ""
)
artist = info.get("artist") or info.get("uploader") or info.get("channel") or ""
album = info.get("album") or info.get("playlist_title") or ""
title = info.get("title") or ""
@@ -734,7 +743,9 @@ def list_metadata_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str
return availability
def get_metadata_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[MetadataProvider]:
def get_metadata_provider(
name: str, config: Optional[Dict[str, Any]] = None
) -> Optional[MetadataProvider]:
cls = _METADATA_PROVIDERS.get(name.lower())
if not cls:
return None
+62 -20
View File
@@ -154,7 +154,9 @@ def _check_lendable(session: requests.Session, edition_id: str) -> Tuple[bool, s
return False, "api-error"
def _resolve_archive_id(session: requests.Session, edition_id: str, ia_candidates: List[str]) -> str:
def _resolve_archive_id(
session: requests.Session, edition_id: str, ia_candidates: List[str]
) -> str:
# Prefer IA identifiers already present in search results.
if ia_candidates:
first = ia_candidates[0].strip()
@@ -420,18 +422,24 @@ class OpenLibrary(Provider):
email = openlibrary_config.get("email")
password = openlibrary_config.get("password")
if email or password:
return str(email) if email is not None else None, str(password) if password is not None else None
return str(email) if email is not None else None, (
str(password) if password is not None else None
)
archive_config = config.get("Archive")
if isinstance(archive_config, dict):
email = archive_config.get("email")
password = archive_config.get("password")
if email or password:
return str(email) if email is not None else None, str(password) if password is not None else None
return str(email) if email is not None else None, (
str(password) if password is not None else None
)
email = config.get("archive_org_email")
password = config.get("archive_org_password")
return str(email) if email is not None else None, str(password) if password is not None else None
return str(email) if email is not None else None, (
str(password) if password is not None else None
)
@staticmethod
def _archive_error_body(response: requests.Response) -> str:
@@ -452,10 +460,14 @@ class OpenLibrary(Provider):
try:
token_json = token_resp.json()
except Exception as exc:
raise RuntimeError(f"Archive login token parse failed: {exc}\n{cls._archive_error_body(token_resp)}")
raise RuntimeError(
f"Archive login token parse failed: {exc}\n{cls._archive_error_body(token_resp)}"
)
if not token_json.get("success"):
raise RuntimeError(f"Archive login token fetch failed\n{cls._archive_error_body(token_resp)}")
raise RuntimeError(
f"Archive login token fetch failed\n{cls._archive_error_body(token_resp)}"
)
token = (token_json.get("value") or {}).get("token")
if not token:
@@ -474,7 +486,9 @@ class OpenLibrary(Provider):
try:
login_json = login_resp.json()
except Exception as exc:
raise RuntimeError(f"Archive login parse failed: {exc}\n{cls._archive_error_body(login_resp)}")
raise RuntimeError(
f"Archive login parse failed: {exc}\n{cls._archive_error_body(login_resp)}"
)
if login_json.get("success") is False:
if login_json.get("value") == "bad_login":
@@ -484,16 +498,23 @@ class OpenLibrary(Provider):
return session
@classmethod
def _archive_loan(cls, session: requests.Session, book_id: str, *, verbose: bool = True) -> requests.Session:
def _archive_loan(
cls, session: requests.Session, book_id: str, *, verbose: bool = True
) -> requests.Session:
data = {"action": "grant_access", "identifier": book_id}
session.post("https://archive.org/services/loans/loan/searchInside.php", data=data, timeout=30)
session.post(
"https://archive.org/services/loans/loan/searchInside.php", data=data, timeout=30
)
data["action"] = "browse_book"
response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
if response.status_code == 400:
try:
err = (response.json() or {}).get("error")
if err == "This book is not available to borrow at this time. Please try again later.":
if (
err
== "This book is not available to borrow at this time. Please try again later."
):
raise cls.BookNotAvailableError("Book is waitlisted or in use")
raise RuntimeError(f"Borrow failed: {err or response.text}")
except cls.BookNotAvailableError:
@@ -574,7 +595,9 @@ class OpenLibrary(Provider):
return False, "archive-metadata-error"
@staticmethod
def _archive_get_book_infos(session: requests.Session, url: str) -> Tuple[str, List[str], Dict[str, Any]]:
def _archive_get_book_infos(
session: requests.Session, url: str
) -> Tuple[str, List[str], Dict[str, Any]]:
"""Extract page links from Archive.org book reader."""
r = session.get(url, timeout=30).text
@@ -748,7 +771,9 @@ class OpenLibrary(Provider):
continue
filename = str(file_info.get("name", ""))
if filename.endswith(".pdf") and file_info.get("source") == "original":
pdf_url = f"https://archive.org/download/{book_id}/{filename.replace(' ', '%20')}"
pdf_url = (
f"https://archive.org/download/{book_id}/{filename.replace(' ', '%20')}"
)
check_response = requests.head(pdf_url, timeout=4, allow_redirects=True)
if check_response.status_code == 200:
return True, pdf_url
@@ -1040,7 +1065,9 @@ class OpenLibrary(Provider):
session_local = requests.Session()
try:
archive_id_local = _resolve_archive_id(session_local, edition_id_local, ia_ids_local)
archive_id_local = _resolve_archive_id(
session_local, edition_id_local, ia_ids_local
)
except Exception:
archive_id_local = ""
@@ -1062,7 +1089,9 @@ class OpenLibrary(Provider):
return "unavailable", reason_local, archive_id_local, ""
availability_rows: List[Tuple[str, str, str, str]] = [("unknown", "", "", "") for _ in range(len(docs))]
availability_rows: List[Tuple[str, str, str, str]] = [
("unknown", "", "", "") for _ in range(len(docs))
]
if docs:
max_workers = min(8, max(1, len(docs)))
done = 0
@@ -1080,7 +1109,6 @@ class OpenLibrary(Provider):
availability_rows[i] = ("unknown", "", "", "")
done += 1
for idx, doc in enumerate(docs):
if not isinstance(doc, dict):
continue
@@ -1150,8 +1178,12 @@ class OpenLibrary(Provider):
table="openlibrary",
title=book_title,
path=(
f"https://openlibrary.org/books/{edition_id}" if edition_id else (
f"https://openlibrary.org{work_key}" if isinstance(work_key, str) and work_key.startswith("/") else "https://openlibrary.org"
f"https://openlibrary.org/books/{edition_id}"
if edition_id
else (
f"https://openlibrary.org{work_key}"
if isinstance(work_key, str) and work_key.startswith("/")
else "https://openlibrary.org"
)
),
detail=(
@@ -1257,7 +1289,11 @@ class OpenLibrary(Provider):
out_path,
session=self._session,
progress_callback=(
(lambda downloaded, total, label: progress_callback("bytes", downloaded, total, label))
(
lambda downloaded, total, label: progress_callback(
"bytes", downloaded, total, label
)
)
if progress_callback is not None
else None
),
@@ -1315,7 +1351,10 @@ class OpenLibrary(Provider):
except Exception:
pass
urls = [f"https://archive.org/borrow/{archive_id}", f"https://archive.org/details/{archive_id}"]
urls = [
f"https://archive.org/borrow/{archive_id}",
f"https://archive.org/details/{archive_id}",
]
title = safe_title
links: Optional[List[str]] = None
last_exc: Optional[Exception] = None
@@ -1358,7 +1397,10 @@ class OpenLibrary(Provider):
pdf_bytes = _image_paths_to_pdf_bytes(images)
if not pdf_bytes:
# Keep images folder for manual conversion.
log("[openlibrary] PDF conversion failed; keeping images folder", file=sys.stderr)
log(
"[openlibrary] PDF conversion failed; keeping images folder",
file=sys.stderr,
)
return Path(temp_dir)
try:
+40 -11
View File
@@ -294,7 +294,9 @@ class Soulseek(Provider):
log(f"[soulseek] Download error: {exc}", file=sys.stderr)
return None
async def perform_search(self, query: str, timeout: float = 9.0, limit: int = 50) -> List[Dict[str, Any]]:
async def perform_search(
self, query: str, timeout: float = 9.0, limit: int = 50
) -> List[Dict[str, Any]]:
"""Perform async Soulseek search."""
from aioslsk.client import SoulSeekClient
@@ -302,7 +304,9 @@ class Soulseek(Provider):
os.makedirs(self.DOWNLOAD_DIR, exist_ok=True)
settings = Settings(credentials=CredentialsSettings(username=self.USERNAME, password=self.PASSWORD))
settings = Settings(
credentials=CredentialsSettings(username=self.USERNAME, password=self.PASSWORD)
)
client = SoulSeekClient(settings)
with _suppress_aioslsk_noise():
@@ -411,7 +415,11 @@ class Soulseek(Provider):
display_name = filename.replace("\\", "/").split("/")[-1]
path_parts = filename.replace("\\", "/").split("/")
artist = path_parts[-3] if len(path_parts) >= 3 else ""
album = path_parts[-2] if len(path_parts) >= 3 else (path_parts[-2] if len(path_parts) == 2 else "")
album = (
path_parts[-2]
if len(path_parts) >= 3
else (path_parts[-2] if len(path_parts) == 2 else "")
)
base_name = display_name.rsplit(".", 1)[0] if "." in display_name else display_name
track_num = ""
@@ -504,6 +512,7 @@ class Soulseek(Provider):
def validate(self) -> bool:
try:
from aioslsk.client import SoulSeekClient # noqa: F401
# Require configured credentials.
try:
from config import get_soulseek_username, get_soulseek_password
@@ -541,7 +550,7 @@ async def download_soulseek_file(
local_filename = filename.replace("\\", "/").split("/")[-1]
output_user_dir = output_dir / username
output_user_dir.mkdir(parents=True, exist_ok=True)
output_path = (output_user_dir / local_filename)
output_path = output_user_dir / local_filename
if output_path.exists():
base = output_path.stem
@@ -556,9 +565,13 @@ async def download_soulseek_file(
login_user = (client_username or Soulseek.USERNAME or "").strip()
login_pass = (client_password or Soulseek.PASSWORD or "").strip()
if not login_user or not login_pass:
raise RuntimeError("Soulseek credentials not configured (set provider=soulseek username/password)")
raise RuntimeError(
"Soulseek credentials not configured (set provider=soulseek username/password)"
)
settings = Settings(credentials=CredentialsSettings(username=login_user, password=login_pass))
settings = Settings(
credentials=CredentialsSettings(username=login_user, password=login_pass)
)
async def _attempt_once(attempt_num: int) -> tuple[Optional[Path], Any, int, float]:
client = SoulSeekClient(settings)
@@ -575,7 +588,9 @@ async def download_soulseek_file(
)
debug(f"[soulseek] Requesting download from {username}: {filename}")
transfer = await client.transfers.add(Transfer(username, filename, TransferDirection.DOWNLOAD))
transfer = await client.transfers.add(
Transfer(username, filename, TransferDirection.DOWNLOAD)
)
transfer.local_path = str(output_path)
await client.transfers.queue(transfer)
@@ -586,7 +601,9 @@ async def download_soulseek_file(
while not transfer.is_finalized():
elapsed = time.time() - start_time
if elapsed > timeout:
log(f"[soulseek] Download timeout after {timeout}s", file=sys.stderr)
log(
f"[soulseek] Download timeout after {timeout}s", file=sys.stderr
)
bytes_done = int(getattr(transfer, "bytes_transfered", 0) or 0)
state_val = getattr(getattr(transfer, "state", None), "VALUE", None)
progress_bar.finish()
@@ -625,13 +642,23 @@ async def download_soulseek_file(
f"[soulseek] Transfer finalized as {final_state}, but file exists ({downloaded_path.stat().st_size} bytes). Keeping file.",
file=sys.stderr,
)
return downloaded_path, final_state, int(downloaded_path.stat().st_size), final_elapsed
return (
downloaded_path,
final_state,
int(downloaded_path.stat().st_size),
final_elapsed,
)
except Exception:
pass
if final_state == TransferState.COMPLETE and downloaded_path.exists():
debug(f"[soulseek] Download complete: {downloaded_path}")
return downloaded_path, final_state, int(downloaded_path.stat().st_size), final_elapsed
return (
downloaded_path,
final_state,
int(downloaded_path.stat().st_size),
final_elapsed,
)
fail_bytes = int(getattr(transfer, "bytes_transfered", 0) or 0)
fail_total = int(getattr(transfer, "filesize", 0) or 0)
@@ -668,7 +695,9 @@ async def download_soulseek_file(
should_retry = (bytes_done == 0) and (elapsed < 15.0)
if attempt < max_attempts and should_retry:
log(f"[soulseek] Retrying after fast failure (state={final_state})", file=sys.stderr)
log(
f"[soulseek] Retrying after fast failure (state={final_state})", file=sys.stderr
)
await asyncio.sleep(2)
continue
break
+34 -10
View File
@@ -146,7 +146,11 @@ class Telegram(Provider):
def __init__(self, config: Optional[Dict[str, Any]] = None):
super().__init__(config)
telegram_conf = self.config.get("provider", {}).get("telegram", {}) if isinstance(self.config, dict) else {}
telegram_conf = (
self.config.get("provider", {}).get("telegram", {})
if isinstance(self.config, dict)
else {}
)
self._app_id = telegram_conf.get("app_id")
self._api_hash = telegram_conf.get("api_hash")
self._bot_token = telegram_conf.get("bot_token")
@@ -365,7 +369,9 @@ class Telegram(Provider):
return False
self._bot_token = bot_token
else:
sys.stderr.write("[telegram] Phone login selected (Telethon will prompt for phone + code).\n")
sys.stderr.write(
"[telegram] Phone login selected (Telethon will prompt for phone + code).\n"
)
sys.stderr.flush()
session_base = self._session_base_path()
@@ -405,11 +411,13 @@ class Telegram(Provider):
if loop_running:
err: list[str] = []
def _worker() -> None:
try:
_run_in_new_loop()
except Exception as exc:
err.append(str(exc))
th = threading.Thread(target=_worker, daemon=True)
th.start()
th.join()
@@ -556,7 +564,9 @@ class Telegram(Provider):
return True
bot_token = str(self._bot_token or "").strip()
if bot_token:
return bool(self._ensure_session_with_bot_token(bot_token) and self._session_is_authorized())
return bool(
self._ensure_session_with_bot_token(bot_token) and self._session_is_authorized()
)
if prompt:
return bool(self._ensure_session_interactive() and self._session_is_authorized())
return False
@@ -596,7 +606,7 @@ class Telegram(Provider):
except TypeError:
dialogs = await client.get_dialogs()
for d in (dialogs or []):
for d in dialogs or []:
entity = getattr(d, "entity", None)
title = ""
username = ""
@@ -641,7 +651,9 @@ class Telegram(Provider):
elif isinstance(entity, User):
kind = "user"
else:
kind = type(entity).__name__.lower() if entity is not None else "unknown"
kind = (
type(entity).__name__.lower() if entity is not None else "unknown"
)
except Exception:
kind = "unknown"
@@ -723,7 +735,7 @@ class Telegram(Provider):
# Normalize and validate file paths + titles.
jobs: list[Dict[str, Any]] = []
seen_paths: set[str] = set()
for f in (files or []):
for f in files or []:
try:
path_text = str((f or {}).get("path") or "").strip()
except Exception:
@@ -812,7 +824,11 @@ class Telegram(Provider):
title_raw = str(job.get("title") or "").strip()
fallback = path_obj.stem
base = _sanitize_filename(title_raw) if title_raw else _sanitize_filename(fallback)
base = (
_sanitize_filename(title_raw)
if title_raw
else _sanitize_filename(fallback)
)
ext = path_obj.suffix
send_name = f"{base}{ext}" if ext else base
@@ -884,7 +900,9 @@ class Telegram(Provider):
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
def _download_message_media_sync(self, *, url: str, output_dir: Path) -> Tuple[Path, Dict[str, Any]]:
def _download_message_media_sync(
self, *, url: str, output_dir: Path
) -> Tuple[Path, Dict[str, Any]]:
# Ensure we have an authorized session before attempting API calls.
# Never prompt during downloads.
if not self.ensure_session(prompt=False):
@@ -989,14 +1007,18 @@ class Telegram(Provider):
pass
from models import ProgressBar
progress_bar = ProgressBar()
last_print = {"t": 0.0}
def _progress(current: int, total: int) -> None:
now = time.monotonic()
if now - float(last_print.get("t", 0.0)) < 0.25 and current < total:
return
last_print["t"] = now
progress_bar.update(downloaded=int(current), total=int(total), label="telegram", file=sys.stderr)
progress_bar.update(
downloaded=int(current), total=int(total), label="telegram", file=sys.stderr
)
part_kb = self._resolve_part_size_kb(file_size)
try:
@@ -1007,7 +1029,9 @@ class Telegram(Provider):
part_size_kb=part_kb,
)
except TypeError:
downloaded = await client.download_media(message, file=str(output_dir), progress_callback=_progress)
downloaded = await client.download_media(
message, file=str(output_dir), progress_callback=_progress
)
progress_bar.finish()
if not downloaded:
raise Exception("Telegram download returned no file")
+5 -1
View File
@@ -20,6 +20,7 @@ class YouTube(Provider):
# Use the yt_dlp Python module (installed via requirements.txt).
try:
import yt_dlp # type: ignore
ydl_opts: Dict[str, Any] = {"quiet": True, "skip_download": True, "extract_flat": True}
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
search_query = f"ytsearch{limit}:{query}"
@@ -34,7 +35,9 @@ class YouTube(Provider):
duration = video_data.get("duration", 0)
view_count = video_data.get("view_count", 0)
duration_str = f"{int(duration // 60)}:{int(duration % 60):02d}" if duration else ""
duration_str = (
f"{int(duration // 60)}:{int(duration % 60):02d}" if duration else ""
)
views_str = f"{view_count:,}" if view_count else ""
results.append(
@@ -67,6 +70,7 @@ class YouTube(Provider):
def validate(self) -> bool:
try:
import yt_dlp # type: ignore
return True
except Exception:
return False
+3 -1
View File
@@ -36,7 +36,9 @@ class ZeroXZero(Provider):
if pipe_obj is not None:
from Store import Store
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(pipe_obj, uploaded_url)
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(
pipe_obj, uploaded_url
)
except Exception:
pass
+3 -1
View File
@@ -83,7 +83,9 @@ class Provider(ABC):
return True
def selector(self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any) -> bool:
def selector(
self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any
) -> bool:
"""Optional hook for handling `@N` selection semantics.
The CLI can delegate selection behavior to a provider/store instead of
+3 -1
View File
@@ -68,7 +68,9 @@ def download_file(
if progress_callback is not None:
progress_callback(downloaded, total, label)
elif bar is not None:
bar.update(downloaded=downloaded, total=total, label=label, file=sys.stderr)
bar.update(
downloaded=downloaded, total=total, label=label, file=sys.stderr
)
except Exception:
pass
+3 -1
View File
@@ -96,7 +96,9 @@ def list_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
return availability
def get_search_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[SearchProvider]:
def get_search_provider(
name: str, config: Optional[Dict[str, Any]] = None
) -> Optional[SearchProvider]:
"""Get a search-capable provider by name (compat API)."""
provider = get_provider(name, config)
+4 -1
View File
@@ -6,6 +6,7 @@ workers start, progress, or finish. Intended for CLI background workflows.
Filters to show only workers related to the current pipeline session to avoid
cluttering the terminal with workers from previous sessions.
"""
from __future__ import annotations
from typing import Any, Callable, Dict, Optional, Set
@@ -83,7 +84,9 @@ class BackgroundNotifier:
if self.overlay_mode:
if status in ("completed", "finished", "error"):
progress_val = worker.get("progress") or worker.get("progress_percent") or ""
step = str(worker.get("current_step") or worker.get("description") or "").strip()
step = str(
worker.get("current_step") or worker.get("description") or ""
).strip()
signature = f"{status}|{progress_val}|{step}"
if self._last_state.get(worker_id) == signature:
+92 -34
View File
@@ -6,6 +6,7 @@ Lean, focused downloader without event infrastructure overhead.
- Tag extraction via metadata.extract_ytdlp_tags()
- Logging via helper.logger.log()
"""
from __future__ import annotations
import glob # noqa: F401
@@ -97,7 +98,9 @@ def is_url_supported_by_ytdlp(url: str) -> bool:
return False
def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[str] = None) -> Optional[List[Dict[str, Any]]]:
def list_formats(
url: str, no_playlist: bool = False, playlist_items: Optional[str] = None
) -> Optional[List[Dict[str, Any]]]:
"""Get list of available formats for a URL using yt-dlp."""
_ensure_yt_dlp_ready()
@@ -125,7 +128,8 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s
result_formats = []
for fmt in formats:
result_formats.append({
result_formats.append(
{
"format_id": fmt.get("format_id", ""),
"format": fmt.get("format", ""),
"ext": fmt.get("ext", ""),
@@ -137,7 +141,8 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s
"acodec": fmt.get("acodec", "none"),
"filesize": fmt.get("filesize"),
"tbr": fmt.get("tbr"),
})
}
)
debug(f"Found {len(result_formats)} available formats")
return result_formats
@@ -147,7 +152,9 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s
return None
def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str], quiet: bool = False) -> tuple[Optional[str], Dict[str, Any]]:
def _download_with_sections_via_cli(
url: str, ytdl_options: Dict[str, Any], sections: List[str], quiet: bool = False
) -> tuple[Optional[str], Dict[str, Any]]:
"""Download each section separately so merge-file can combine them.
yt-dlp with multiple --download-sections args merges them into one file.
@@ -167,7 +174,7 @@ def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sect
# Generate a unique hash-based ID for this download session
# This ensures different videos/downloads don't have filename collisions
session_id = hashlib.md5(
(url + str(time.time()) + ''.join(random.choices(string.ascii_letters, k=10))).encode()
(url + str(time.time()) + "".join(random.choices(string.ascii_letters, k=10))).encode()
).hexdigest()[:12]
first_section_info = None
@@ -205,7 +212,7 @@ def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sect
try:
info_dict = json.loads(meta_result.stdout.strip())
first_section_info = info_dict
title_from_first = info_dict.get('title')
title_from_first = info_dict.get("title")
if not quiet:
debug(f"Extracted title from metadata: {title_from_first}")
except json.JSONDecodeError:
@@ -254,9 +261,13 @@ def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sect
result = subprocess.run(cmd)
if result.returncode != 0:
raise DownloadError(f"yt-dlp subprocess failed for section {section_idx} with code {result.returncode}")
raise DownloadError(
f"yt-dlp subprocess failed for section {section_idx} with code {result.returncode}"
)
except Exception as exc:
raise DownloadError(f"yt-dlp subprocess error for section {section_idx}: {exc}") from exc
raise DownloadError(
f"yt-dlp subprocess error for section {section_idx}: {exc}"
) from exc
return session_id, first_section_info or {}
@@ -299,7 +310,13 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
else: # video
base_options["format"] = opts.ytdl_format or "bestvideo+bestaudio/best"
base_options["format_sort"] = [
"res:4320", "res:2880", "res:2160", "res:1440", "res:1080", "res:720", "res"
"res:4320",
"res:2880",
"res:2160",
"res:1440",
"res:1080",
"res:720",
"res",
]
# Add clip sections if provided (yt-dlp will download only these sections)
@@ -307,9 +324,9 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
# Parse section ranges like "48-65,120-152,196-205" (seconds)
# and convert to yt-dlp format: "*HH:MM:SS-HH:MM:SS,*HH:MM:SS-HH:MM:SS"
sections = []
for section_range in opts.clip_sections.split(','):
for section_range in opts.clip_sections.split(","):
try:
start_str, end_str = section_range.strip().split('-')
start_str, end_str = section_range.strip().split("-")
start_sec = float(start_str)
end_sec = float(end_str)
@@ -428,17 +445,17 @@ def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
# Check if this is a LibGen URL
parsed = urlparse(libgen_url)
if 'libgen' not in parsed.netloc.lower():
if "libgen" not in parsed.netloc.lower():
return None
if '/file.php' not in parsed.path.lower():
if "/file.php" not in parsed.path.lower():
return None
# LibGen redirects to actual mirrors, follow redirects to get final URL
session = requests.Session()
session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
})
session.headers.update(
{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
)
debug(f"Following LibGen redirect chain for: {libgen_url}")
@@ -462,8 +479,12 @@ def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
continue
href_lower = href.lower()
if "get.php" in href_lower or href_lower.endswith((".pdf", ".epub", ".djvu", ".mobi")):
download_url = href if href.startswith("http") else urljoin(final_url, href)
if "get.php" in href_lower or href_lower.endswith(
(".pdf", ".epub", ".djvu", ".mobi")
):
download_url = (
href if href.startswith("http") else urljoin(final_url, href)
)
debug(f"Found download link: {download_url}")
return download_url
else:
@@ -477,8 +498,12 @@ def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
if not href or href.lower().startswith("javascript:"):
continue
href_lower = href.lower()
if "get.php" in href_lower or href_lower.endswith((".pdf", ".epub", ".djvu", ".mobi")):
download_url = href if href.startswith("http") else urljoin(final_url, href)
if "get.php" in href_lower or href_lower.endswith(
(".pdf", ".epub", ".djvu", ".mobi")
):
download_url = (
href if href.startswith("http") else urljoin(final_url, href)
)
debug(f"Found download link: {download_url}")
return download_url
except Exception:
@@ -569,7 +594,7 @@ def _download_direct_file(
filename = None
if parsed_url.query:
query_params = parse_qs(parsed_url.query)
for param_name in ('filename', 'download', 'file', 'name'):
for param_name in ("filename", "download", "file", "name"):
if param_name in query_params and query_params[param_name]:
filename = query_params[param_name][0]
filename = unquote(filename)
@@ -627,7 +652,12 @@ def _download_direct_file(
with HTTPClient(timeout=10.0) as client:
with client._request_stream("GET", url, follow_redirects=True) as resp:
resp.raise_for_status()
ct = str(resp.headers.get("content-type", "") or "").split(";", 1)[0].strip().lower()
ct = (
str(resp.headers.get("content-type", "") or "")
.split(";", 1)[0]
.strip()
.lower()
)
if ct.startswith("text/html"):
raise DownloadError("URL appears to be an HTML page, not a direct file")
except DownloadError:
@@ -683,7 +713,9 @@ def _download_direct_file(
# Final guardrail: if filename is empty, refuse rather than inventing `download.bin`.
if not filename or not str(filename).strip():
raise DownloadError("Could not determine filename for URL (no Content-Disposition and no path filename)")
raise DownloadError(
"Could not determine filename for URL (no Content-Disposition and no path filename)"
)
file_path = _unique_path(output_dir / filename)
@@ -719,12 +751,18 @@ def _download_direct_file(
if transfer_started[0]:
return
try:
total_val: Optional[int] = int(content_length) if isinstance(content_length, int) and content_length > 0 else None
total_val: Optional[int] = (
int(content_length)
if isinstance(content_length, int) and content_length > 0
else None
)
except Exception:
total_val = None
try:
if hasattr(pipeline_progress, "begin_transfer"):
pipeline_progress.begin_transfer(label=str(filename or "download"), total=total_val)
pipeline_progress.begin_transfer(
label=str(filename or "download"), total=total_val
)
transfer_started[0] = True
except Exception:
return
@@ -737,7 +775,11 @@ def _download_direct_file(
try:
if pipeline_progress is not None and hasattr(pipeline_progress, "update_transfer"):
_maybe_begin_transfer(content_length)
total_val: Optional[int] = int(content_length) if isinstance(content_length, int) and content_length > 0 else None
total_val: Optional[int] = (
int(content_length)
if isinstance(content_length, int) and content_length > 0
else None
)
pipeline_progress.update_transfer(
label=str(filename or "download"),
completed=int(bytes_downloaded) if bytes_downloaded is not None else None,
@@ -790,14 +832,21 @@ def _download_direct_file(
pass
try:
if pipeline_progress is not None and transfer_started[0] and hasattr(pipeline_progress, "finish_transfer"):
if (
pipeline_progress is not None
and transfer_started[0]
and hasattr(pipeline_progress, "finish_transfer")
):
pipeline_progress.finish_transfer(label=str(filename or "download"))
except Exception:
pass
try:
if progress_bar is not None:
avg_speed_str = progress_bar.format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0) + "/s"
avg_speed_str = (
progress_bar.format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0)
+ "/s"
)
else:
avg_speed_str = f"{(downloaded_bytes[0] / elapsed if elapsed > 0 else 0):.1f} B/s"
except Exception:
@@ -835,9 +884,9 @@ def _download_direct_file(
# Only use filename as a title tag if we couldn't extract any meaningful tags
# This prevents duplicate title: tags when the filename could be mistaken for metadata
if not any(t.startswith('title:') for t in tags):
if not any(t.startswith("title:") for t in tags):
# Re-extract tags with filename as title only if needed
info['title'] = filename
info["title"] = filename
tags = []
if extract_ytdlp_tags:
try:
@@ -866,7 +915,11 @@ def _download_direct_file(
except Exception:
pass
try:
if pipeline_progress is not None and transfer_started[0] and hasattr(pipeline_progress, "finish_transfer"):
if (
pipeline_progress is not None
and transfer_started[0]
and hasattr(pipeline_progress, "finish_transfer")
):
pipeline_progress.finish_transfer(label=str(filename or "download"))
except Exception:
pass
@@ -884,7 +937,11 @@ def _download_direct_file(
except Exception:
pass
try:
if pipeline_progress is not None and transfer_started[0] and hasattr(pipeline_progress, "finish_transfer"):
if (
pipeline_progress is not None
and transfer_started[0]
and hasattr(pipeline_progress, "finish_transfer")
):
pipeline_progress.finish_transfer(label=str(filename or "download"))
except Exception:
pass
@@ -902,7 +959,9 @@ def _download_direct_file(
raise DownloadError(f"Error downloading file: {exc}") from exc
def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) -> Optional[Dict[str, Any]]:
def probe_url(
url: str, no_playlist: bool = False, timeout_seconds: int = 15
) -> Optional[Dict[str, Any]]:
"""Probe URL to extract metadata WITHOUT downloading.
Args:
@@ -991,4 +1050,3 @@ __all__ = [
"DownloadOptions",
"DownloadMediaResult",
]
+6 -1
View File
@@ -8,6 +8,7 @@ It is intentionally lightweight and safe to import early at process
startup so the CLI can detect and surface environment problems before
trying to import cmdlets or other modules.
"""
from __future__ import annotations
import importlib
@@ -99,7 +100,11 @@ def check_urllib3_compat() -> Tuple[bool, str]:
return False, "\n".join(lines)
# Looks good
debug("urllib3 appears usable: version=%s, exceptions=%s", getattr(urllib3, "__version__", "<unknown>"), hasattr(urllib3, "exceptions"))
debug(
"urllib3 appears usable: version=%s, exceptions=%s",
getattr(urllib3, "__version__", "<unknown>"),
hasattr(urllib3, "exceptions"),
)
return True, "OK"
+9 -7
View File
@@ -27,7 +27,7 @@ class FileServerHandler(SimpleHTTPRequestHandler):
file_path = urllib.parse.unquote(parsed_path.path)
# Remove leading slash
if file_path.startswith('/'):
if file_path.startswith("/"):
file_path = file_path[1:]
# Decode the file path (it's URL encoded)
@@ -44,16 +44,18 @@ class FileServerHandler(SimpleHTTPRequestHandler):
# Determine content type
content_type, _ = mimetypes.guess_type(str(full_path))
if content_type is None:
content_type = 'application/octet-stream'
content_type = "application/octet-stream"
try:
with open(full_path, 'rb') as f:
with open(full_path, "rb") as f:
file_content = f.read()
self.send_response(200)
self.send_header('Content-type', content_type)
self.send_header('Content-Length', str(len(file_content)))
self.send_header('Content-Disposition', f'attachment; filename="{full_path.name}"')
self.send_header("Content-type", content_type)
self.send_header("Content-Length", str(len(file_content)))
self.send_header(
"Content-Disposition", f'attachment; filename="{full_path.name}"'
)
self.end_headers()
self.wfile.write(file_content)
logger.info(f"Successfully served file: {full_path.name}")
@@ -112,7 +114,7 @@ def start_file_server(port: int = 8001) -> Optional[str]:
_server_port = port
# Create server
server_address = ('', port)
server_address = ("", port)
_file_server = HTTPServer(server_address, FileServerHandler)
# Start in daemon thread
+15 -9
View File
@@ -10,23 +10,28 @@ from rich_display import console_for
_DEBUG_ENABLED = False
_thread_local = threading.local()
def set_thread_stream(stream):
"""Set a custom output stream for the current thread."""
_thread_local.stream = stream
def get_thread_stream():
"""Get the custom output stream for the current thread, if any."""
return getattr(_thread_local, 'stream', None)
return getattr(_thread_local, "stream", None)
def set_debug(enabled: bool) -> None:
"""Enable or disable debug logging."""
global _DEBUG_ENABLED
_DEBUG_ENABLED = enabled
def is_debug_enabled() -> bool:
"""Check if debug logging is enabled."""
return _DEBUG_ENABLED
def debug(*args, **kwargs) -> None:
"""Print debug message if debug logging is enabled.
@@ -38,8 +43,8 @@ def debug(*args, **kwargs) -> None:
# Check if stderr has been redirected to /dev/null (quiet mode)
# If so, skip output to avoid queuing in background worker's capture
try:
stderr_name = getattr(sys.stderr, 'name', '')
if 'nul' in str(stderr_name).lower() or '/dev/null' in str(stderr_name):
stderr_name = getattr(sys.stderr, "name", "")
if "nul" in str(stderr_name).lower() or "/dev/null" in str(stderr_name):
return
except Exception:
pass
@@ -47,10 +52,10 @@ def debug(*args, **kwargs) -> None:
# Check for thread-local stream first
stream = get_thread_stream()
if stream:
kwargs['file'] = stream
kwargs["file"] = stream
# Set default to stderr for debug messages
elif 'file' not in kwargs:
kwargs['file'] = sys.stderr
elif "file" not in kwargs:
kwargs["file"] = sys.stderr
# Prepend DEBUG label
args = ("DEBUG:", *args)
@@ -150,6 +155,7 @@ def debug_inspect(
value=value,
)
def log(*args, **kwargs) -> None:
"""Print with automatic file.function prefix.
@@ -189,10 +195,10 @@ def log(*args, **kwargs) -> None:
# Check for thread-local stream first
stream = get_thread_stream()
if stream:
kwargs['file'] = stream
kwargs["file"] = stream
# Set default to stdout if not specified
elif 'file' not in kwargs:
kwargs['file'] = sys.stdout
elif "file" not in kwargs:
kwargs["file"] = sys.stdout
file = kwargs.pop("file", sys.stdout)
sep = kwargs.pop("sep", " ")
+18 -6
View File
@@ -33,7 +33,9 @@ class PipelineProgress:
pipe_idx: int = 0
try:
stage_ctx = self._ctx.get_stage_context() if hasattr(self._ctx, "get_stage_context") else None
stage_ctx = (
self._ctx.get_stage_context() if hasattr(self._ctx, "get_stage_context") else None
)
maybe_idx = getattr(stage_ctx, "pipe_index", None) if stage_ctx is not None else None
if isinstance(maybe_idx, int):
pipe_idx = int(maybe_idx)
@@ -108,7 +110,9 @@ class PipelineProgress:
except Exception:
return
def update_transfer(self, *, label: str, completed: Optional[int], total: Optional[int] = None) -> None:
def update_transfer(
self, *, label: str, completed: Optional[int], total: Optional[int] = None
) -> None:
ui, _ = self.ui_and_pipe_index()
if ui is None:
return
@@ -144,11 +148,15 @@ class PipelineProgress:
except Exception:
return
def ensure_local_ui(self, *, label: str, total_items: int, items_preview: Optional[Sequence[Any]] = None) -> bool:
def ensure_local_ui(
self, *, label: str, total_items: int, items_preview: Optional[Sequence[Any]] = None
) -> bool:
"""Start a local PipelineLiveProgress panel if no shared UI exists."""
try:
existing = self._ctx.get_live_progress() if hasattr(self._ctx, "get_live_progress") else None
existing = (
self._ctx.get_live_progress() if hasattr(self._ctx, "get_live_progress") else None
)
except Exception:
existing = None
@@ -170,7 +178,9 @@ class PipelineProgress:
self._local_attached = False
try:
ui.begin_pipe(0, total_items=max(1, int(total_items)), items_preview=list(items_preview or []))
ui.begin_pipe(
0, total_items=max(1, int(total_items)), items_preview=list(items_preview or [])
)
except Exception:
pass
@@ -210,7 +220,9 @@ class PipelineProgress:
total_items: int,
items_preview: Optional[Sequence[Any]] = None,
) -> Iterator["PipelineProgress"]:
created = self.ensure_local_ui(label=label, total_items=total_items, items_preview=items_preview)
created = self.ensure_local_ui(
label=label, total_items=total_items, items_preview=items_preview
)
try:
yield self
finally:
+9 -2
View File
@@ -14,8 +14,15 @@ from models import ProgressBar
_BAR = ProgressBar()
def print_progress(filename: str, current: int, total: int, speed: float = 0, end: str = "\r") -> None:
_BAR.update(downloaded=int(current), total=int(total) if total else None, label=str(filename or "progress"), file=sys.stderr)
def print_progress(
filename: str, current: int, total: int, speed: float = 0, end: str = "\r"
) -> None:
_BAR.update(
downloaded=int(current),
total=int(total) if total else None,
label=str(filename or "progress"),
file=sys.stderr,
)
def print_final_progress(filename: str, total: int, elapsed: float) -> None:
+83 -47
View File
@@ -1,4 +1,5 @@
"""Background task handling and IPC helpers for mpv integration."""
from __future__ import annotations
import errno
import json
@@ -11,16 +12,18 @@ from SYS.logger import log
import threading
import time
from typing import IO, Iterable
def connect_ipc(path: str, timeout: float = 5.0) -> IO[bytes] | None:
"""Connect to the mpv IPC server located at *path*."""
deadline = time.time() + timeout
if not path:
return None
if os.name == 'nt':
if os.name == "nt":
# mpv exposes a named pipe on Windows. Keep retrying until it is ready.
while True:
try:
return open(path, 'r+b', buffering=0)
return open(path, "r+b", buffering=0)
except FileNotFoundError:
if time.time() > deadline:
return None
@@ -38,7 +41,7 @@ def connect_ipc(path: str, timeout: float = 5.0) -> IO[bytes] | None:
while True:
try:
sock.connect(path)
return sock.makefile('r+b', buffering=0)
return sock.makefile("r+b", buffering=0)
except FileNotFoundError:
if time.time() > deadline:
return None
@@ -49,74 +52,92 @@ def connect_ipc(path: str, timeout: float = 5.0) -> IO[bytes] | None:
if time.time() > deadline:
return None
time.sleep(0.05)
def ipc_sender(ipc: IO[bytes] | None):
"""Create a helper function for sending script messages via IPC."""
if ipc is None:
def _noop(_event: str, _payload: dict) -> None:
return None
return _noop
lock = threading.Lock()
def _send(event: str, payload: dict) -> None:
message = json.dumps({'command': ['script-message', event, json.dumps(payload)]}, ensure_ascii=False)
encoded = message.encode('utf-8') + b'\n'
message = json.dumps(
{"command": ["script-message", event, json.dumps(payload)]}, ensure_ascii=False
)
encoded = message.encode("utf-8") + b"\n"
with lock:
try:
ipc.write(encoded)
ipc.flush()
except OSError:
pass
return _send
def iter_stream(stream: Iterable[str]) -> Iterable[str]:
for raw in stream:
yield raw.rstrip('\r\n')
yield raw.rstrip("\r\n")
def _run_task(args, parser) -> int:
if not args.command:
parser.error('run-task requires a command to execute (use "--" before the command).')
env = os.environ.copy()
for entry in args.env:
key, sep, value = entry.partition('=')
key, sep, value = entry.partition("=")
if not sep:
parser.error(f'Invalid environment variable definition: {entry!r}')
parser.error(f"Invalid environment variable definition: {entry!r}")
env[key] = value
command = list(args.command)
if command and command[0] == '--':
if command and command[0] == "--":
command.pop(0)
notifier = ipc_sender(connect_ipc(args.ipc, timeout=args.ipc_timeout))
if not command:
notifier('downlow-task-event', {
'id': args.task_id,
'event': 'error',
'message': 'No command provided after separator',
})
log('[downlow.py] No command provided for run-task', file=sys.stderr)
notifier(
"downlow-task-event",
{
"id": args.task_id,
"event": "error",
"message": "No command provided after separator",
},
)
log("[downlow.py] No command provided for run-task", file=sys.stderr)
return 1
if command and isinstance(command[0], str) and sys.executable:
first = command[0].lower()
if first in {'python', 'python3', 'py', 'python.exe', 'python3.exe', 'py.exe'}:
if first in {"python", "python3", "py", "python.exe", "python3.exe", "py.exe"}:
command[0] = sys.executable
if os.environ.get('DOWNLOW_DEBUG'):
if os.environ.get("DOWNLOW_DEBUG"):
log(f"Launching command: {command}", file=sys.stderr)
notifier('downlow-task-event', {
'id': args.task_id,
'event': 'start',
'command': command,
'cwd': args.cwd or os.getcwd(),
})
notifier(
"downlow-task-event",
{
"id": args.task_id,
"event": "start",
"command": command,
"cwd": args.cwd or os.getcwd(),
},
)
popen_kwargs = {}
if os.name == 'nt':
if os.name == "nt":
# Avoid flashing a console window when spawning console-subsystem executables.
flags = 0
try:
flags |= int(getattr(subprocess, 'CREATE_NO_WINDOW', 0x08000000))
flags |= int(getattr(subprocess, "CREATE_NO_WINDOW", 0x08000000))
except Exception:
flags |= 0x08000000
popen_kwargs['creationflags'] = flags
popen_kwargs["creationflags"] = flags
try:
si = subprocess.STARTUPINFO()
si.dwFlags |= subprocess.STARTF_USESHOWWINDOW
si.wShowWindow = subprocess.SW_HIDE
popen_kwargs['startupinfo'] = si
popen_kwargs["startupinfo"] = si
except Exception:
pass
try:
@@ -132,44 +153,59 @@ def _run_task(args, parser) -> int:
**popen_kwargs,
)
except FileNotFoundError as exc:
notifier('downlow-task-event', {
'id': args.task_id,
'event': 'error',
'message': f'Executable not found: {exc.filename}',
})
notifier(
"downlow-task-event",
{
"id": args.task_id,
"event": "error",
"message": f"Executable not found: {exc.filename}",
},
)
log(f"{exc}", file=sys.stderr)
return 1
stdout_lines: list[str] = []
stderr_lines: list[str] = []
def pump(stream: IO[str], label: str, sink: list[str]) -> None:
for line in iter_stream(stream):
sink.append(line)
notifier('downlow-task-event', {
'id': args.task_id,
'event': label,
'line': line,
})
notifier(
"downlow-task-event",
{
"id": args.task_id,
"event": label,
"line": line,
},
)
threads = []
if process.stdout:
t_out = threading.Thread(target=pump, args=(process.stdout, 'stdout', stdout_lines), daemon=True)
t_out = threading.Thread(
target=pump, args=(process.stdout, "stdout", stdout_lines), daemon=True
)
t_out.start()
threads.append(t_out)
if process.stderr:
t_err = threading.Thread(target=pump, args=(process.stderr, 'stderr', stderr_lines), daemon=True)
t_err = threading.Thread(
target=pump, args=(process.stderr, "stderr", stderr_lines), daemon=True
)
t_err.start()
threads.append(t_err)
return_code = process.wait()
for t in threads:
t.join(timeout=0.1)
notifier('downlow-task-event', {
'id': args.task_id,
'event': 'exit',
'returncode': return_code,
'success': return_code == 0,
})
notifier(
"downlow-task-event",
{
"id": args.task_id,
"event": "exit",
"returncode": return_code,
"success": return_code == 0,
},
)
# Also mirror aggregated output to stdout/stderr for compatibility when IPC is unavailable.
if stdout_lines:
log('\n'.join(stdout_lines))
log("\n".join(stdout_lines))
if stderr_lines:
log('\n'.join(stderr_lines), file=sys.stderr)
log("\n".join(stderr_lines), file=sys.stderr)
return return_code
+62 -33
View File
@@ -1,10 +1,12 @@
"""General-purpose helpers used across the downlow CLI."""
from __future__ import annotations
import json
import hashlib
import subprocess
import shutil
try:
import ffmpeg # type: ignore
except Exception:
@@ -28,12 +30,16 @@ except ImportError:
CHUNK_SIZE = 1024 * 1024 # 1 MiB
_format_logger = logging.getLogger(__name__)
def ensure_directory(path: Path) -> None:
"""Ensure *path* exists as a directory."""
try:
path.mkdir(parents=True, exist_ok=True)
except OSError as exc: # pragma: no cover - surfaced to caller
raise RuntimeError(f"Failed to create directory {path}: {exc}") from exc
def unique_path(path: Path) -> Path:
"""Return a unique path by appending " (n)" if needed."""
if not path.exists():
@@ -48,15 +54,18 @@ def unique_path(path: Path) -> Path:
return candidate
counter += 1
def sanitize_metadata_value(value: Any) -> str | None:
if value is None:
return None
if not isinstance(value, str):
value = str(value)
value = value.replace('\x00', ' ').replace('\r', ' ').replace('\n', ' ').strip()
value = value.replace("\x00", " ").replace("\r", " ").replace("\n", " ").strip()
if not value:
return None
return value
def unique_preserve_order(values: Iterable[str]) -> list[str]:
seen: set[str] = set()
ordered: list[str] = []
@@ -65,11 +74,13 @@ def unique_preserve_order(values: Iterable[str]) -> list[str]:
seen.add(value)
ordered.append(value)
return ordered
def sha256_file(file_path: Path) -> str:
"""Return the SHA-256 hex digest of *path*."""
hasher = hashlib.sha256()
with file_path.open('rb') as handle:
for chunk in iter(lambda: handle.read(CHUNK_SIZE), b''):
with file_path.open("rb") as handle:
for chunk in iter(lambda: handle.read(CHUNK_SIZE), b""):
hasher.update(chunk)
return hasher.hexdigest()
@@ -90,31 +101,31 @@ def create_metadata_sidecar(file_path: Path, metadata: dict) -> None:
file_ext = file_path.suffix.lower()
# Ensure metadata has a title field that matches the filename (without extension)
# This allows the sidecar to be matched and imported properly during batch import
if 'title' not in metadata or not metadata.get('title'):
metadata['title'] = file_name
metadata['hash'] = sha256_file(file_path)
metadata['size'] = Path(file_path).stat().st_size
if "title" not in metadata or not metadata.get("title"):
metadata["title"] = file_name
metadata["hash"] = sha256_file(file_path)
metadata["size"] = Path(file_path).stat().st_size
format_found = False
for mime_type, ext_map in SYS.utils_constant.mime_maps.items():
for key, info in ext_map.items():
if info.get("ext") == file_ext:
metadata['type'] = mime_type
metadata["type"] = mime_type
format_found = True
break
if format_found:
break
else:
metadata['type'] = 'unknown'
metadata["type"] = "unknown"
metadata.update(ffprobe(str(file_path)))
metadata_path = file_path.with_suffix(file_path.suffix + '.metadata')
metadata_path = file_path.with_suffix(file_path.suffix + ".metadata")
try:
with open(metadata_path, 'w', encoding='utf-8') as f:
with open(metadata_path, "w", encoding="utf-8") as f:
json.dump(metadata, f, ensure_ascii=False, indent=2)
except OSError as exc:
raise RuntimeError(f"Failed to write metadata sidecar {metadata_path}: {exc}") from exc
def create_tags_sidecar(file_path: Path, tags: set) -> None:
"""Create a .tag sidecar file with tags (one per line).
@@ -125,9 +136,9 @@ def create_tags_sidecar(file_path: Path, tags: set) -> None:
if not tags:
return
tags_path = file_path.with_suffix(file_path.suffix + '.tag')
tags_path = file_path.with_suffix(file_path.suffix + ".tag")
try:
with open(tags_path, 'w', encoding='utf-8') as f:
with open(tags_path, "w", encoding="utf-8") as f:
for tag in sorted(tags):
f.write(f"{str(tag).strip().lower()}\n")
except Exception as e:
@@ -157,7 +168,16 @@ def ffprobe(file_path: str) -> dict:
if ffprobe_cmd:
try:
proc = subprocess.run(
[ffprobe_cmd, "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", str(file_path)],
[
ffprobe_cmd,
"-v",
"quiet",
"-print_format",
"json",
"-show_format",
"-show_streams",
str(file_path),
],
check=True,
capture_output=True,
text=True,
@@ -185,7 +205,9 @@ def ffprobe(file_path: str) -> dict:
if codec_type == "audio":
metadata["audio_codec"] = stream.get("codec_name")
metadata["bitrate"] = int(stream.get("bit_rate", 0)) if "bit_rate" in stream else None
metadata["samplerate"] = int(stream.get("sample_rate", 0)) if "sample_rate" in stream else None
metadata["samplerate"] = (
int(stream.get("sample_rate", 0)) if "sample_rate" in stream else None
)
metadata["channels"] = int(stream.get("channels", 0)) if "channels" in stream else None
elif codec_type == "video":
metadata["video_codec"] = stream.get("codec_name")
@@ -263,7 +285,7 @@ def format_duration(seconds) -> str:
Returns:
Formatted string like '1:23:45' or '5:30'
"""
if seconds is None or seconds == '':
if seconds is None or seconds == "":
return "N/A"
if isinstance(seconds, str):
@@ -307,8 +329,8 @@ def format_timestamp(timestamp_str) -> str:
# Handle ISO format timestamps
if isinstance(timestamp_str, str):
# Try parsing ISO format
if 'T' in timestamp_str:
dt = datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
if "T" in timestamp_str:
dt = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
else:
# Try other common formats
dt = datetime.fromisoformat(timestamp_str)
@@ -331,15 +353,22 @@ def format_metadata_value(key: str, value) -> str:
Returns:
Formatted string for display
"""
if value is None or value == '':
if value is None or value == "":
return "N/A"
# Apply field-specific formatting
if key in ('size', 'file_size'):
if key in ("size", "file_size"):
return format_bytes(value)
elif key in ('duration', 'length'):
elif key in ("duration", "length"):
return format_duration(value)
elif key in ('time_modified', 'time_imported', 'created_at', 'updated_at', 'indexed_at', 'timestamp'):
elif key in (
"time_modified",
"time_imported",
"created_at",
"updated_at",
"indexed_at",
"timestamp",
):
return format_timestamp(value)
else:
return str(value)
@@ -365,7 +394,7 @@ def extract_link_from_args(args: Iterable[str]) -> Any | None:
return None
potential_link = str(args_list[0])
if potential_link.startswith(('http://', 'https://')):
if potential_link.startswith(("http://", "https://")):
return potential_link
return None
@@ -381,12 +410,12 @@ def extract_link_from_result(result: Any) -> Any | None:
URL string if found, None otherwise
"""
if isinstance(result, dict):
return result.get('url') or result.get('link') or result.get('href')
return result.get("url") or result.get("link") or result.get("href")
return (
getattr(result, 'url', None) or
getattr(result, 'link', None) or
getattr(result, 'href', None)
getattr(result, "url", None)
or getattr(result, "link", None)
or getattr(result, "href", None)
)
@@ -421,7 +450,7 @@ def get_api_key(config: dict[str, Any], service: str, key_path: str) -> str | No
API key if found and not empty, None otherwise
"""
try:
parts = key_path.split('.')
parts = key_path.split(".")
value = config
for part in parts:
if isinstance(value, dict):
@@ -446,11 +475,11 @@ def add_direct_link_to_result(result: Any, direct_link: str, original_link: str)
original_link: The original restricted URL
"""
if isinstance(result, dict):
result['direct_link'] = direct_link
result['original_link'] = original_link
result["direct_link"] = direct_link
result["original_link"] = original_link
else:
setattr(result, 'direct_link', direct_link)
setattr(result, 'original_link', original_link)
setattr(result, "direct_link", direct_link)
setattr(result, "original_link", original_link)
# ============================================================================
+83 -66
View File
@@ -1,82 +1,99 @@
mime_maps = {
"image": {
"jpg": { "ext": ".jpg", "mimes": ["image/jpeg", "image/jpg"] },
"png": { "ext": ".png", "mimes": ["image/png"] },
"gif": { "ext": ".gif", "mimes": ["image/gif"] },
"webp": { "ext": ".webp", "mimes": ["image/webp"] },
"avif": { "ext": ".avif", "mimes": ["image/avif"] },
"jxl": { "ext": ".jxl", "mimes": ["image/jxl"] },
"bmp": { "ext": ".bmp", "mimes": ["image/bmp"] },
"heic": { "ext": ".heic", "mimes": ["image/heic"] },
"heif": { "ext": ".heif", "mimes": ["image/heif"] },
"ico": { "ext": ".ico", "mimes": ["image/x-icon", "image/vnd.microsoft.icon"] },
"qoi": { "ext": ".qoi", "mimes": ["image/qoi"] },
"tiff": { "ext": ".tiff", "mimes": ["image/tiff", "image/x-tiff"] },
"svg": { "ext": ".svg", "mimes": ["image/svg+xml"] }
"jpg": {"ext": ".jpg", "mimes": ["image/jpeg", "image/jpg"]},
"png": {"ext": ".png", "mimes": ["image/png"]},
"gif": {"ext": ".gif", "mimes": ["image/gif"]},
"webp": {"ext": ".webp", "mimes": ["image/webp"]},
"avif": {"ext": ".avif", "mimes": ["image/avif"]},
"jxl": {"ext": ".jxl", "mimes": ["image/jxl"]},
"bmp": {"ext": ".bmp", "mimes": ["image/bmp"]},
"heic": {"ext": ".heic", "mimes": ["image/heic"]},
"heif": {"ext": ".heif", "mimes": ["image/heif"]},
"ico": {"ext": ".ico", "mimes": ["image/x-icon", "image/vnd.microsoft.icon"]},
"qoi": {"ext": ".qoi", "mimes": ["image/qoi"]},
"tiff": {"ext": ".tiff", "mimes": ["image/tiff", "image/x-tiff"]},
"svg": {"ext": ".svg", "mimes": ["image/svg+xml"]},
},
"image_sequence": {
"apng": { "ext": ".apng", "mimes": ["image/apng"], "sequence": True },
"avifs": { "ext": ".avifs", "mimes": ["image/avif-sequence"], "sequence": True },
"heics": { "ext": ".heics", "mimes": ["image/heic-sequence"], "sequence": True },
"heifs": { "ext": ".heifs", "mimes": ["image/heif-sequence"], "sequence": True }
"apng": {"ext": ".apng", "mimes": ["image/apng"], "sequence": True},
"avifs": {"ext": ".avifs", "mimes": ["image/avif-sequence"], "sequence": True},
"heics": {"ext": ".heics", "mimes": ["image/heic-sequence"], "sequence": True},
"heifs": {"ext": ".heifs", "mimes": ["image/heif-sequence"], "sequence": True},
},
"video": {
"mp4": { "ext": ".mp4", "mimes": ["video/mp4", "audio/mp4"] },
"webm": { "ext": ".webm", "mimes": ["video/webm", "audio/webm"] },
"mov": { "ext": ".mov", "mimes": ["video/quicktime"] },
"ogv": { "ext": ".ogv", "mimes": ["video/ogg"] },
"mpeg": { "ext": ".mpeg", "mimes": ["video/mpeg"] },
"avi": { "ext": ".avi", "mimes": ["video/x-msvideo", "video/avi"] },
"flv": { "ext": ".flv", "mimes": ["video/x-flv"] },
"mkv": { "ext": ".mkv", "mimes": ["video/x-matroska", "application/x-matroska"], "audio_only_ext": ".mka" },
"wmv": { "ext": ".wmv", "mimes": ["video/x-ms-wmv"] },
"rv": { "ext": ".rv", "mimes": ["video/vnd.rn-realvideo"] }
"mp4": {"ext": ".mp4", "mimes": ["video/mp4", "audio/mp4"]},
"webm": {"ext": ".webm", "mimes": ["video/webm", "audio/webm"]},
"mov": {"ext": ".mov", "mimes": ["video/quicktime"]},
"ogv": {"ext": ".ogv", "mimes": ["video/ogg"]},
"mpeg": {"ext": ".mpeg", "mimes": ["video/mpeg"]},
"avi": {"ext": ".avi", "mimes": ["video/x-msvideo", "video/avi"]},
"flv": {"ext": ".flv", "mimes": ["video/x-flv"]},
"mkv": {
"ext": ".mkv",
"mimes": ["video/x-matroska", "application/x-matroska"],
"audio_only_ext": ".mka",
},
"wmv": {"ext": ".wmv", "mimes": ["video/x-ms-wmv"]},
"rv": {"ext": ".rv", "mimes": ["video/vnd.rn-realvideo"]},
},
"audio": {
"mp3": { "ext": ".mp3", "mimes": ["audio/mpeg", "audio/mp3"] },
"m4a": { "ext": ".m4a", "mimes": ["audio/mp4", "audio/x-m4a"] },
"ogg": { "ext": ".ogg", "mimes": ["audio/ogg"] },
"opus": { "ext": ".opus", "mimes": ["audio/opus"] },
"flac": { "ext": ".flac", "mimes": ["audio/flac"] },
"wav": { "ext": ".wav", "mimes": ["audio/wav", "audio/x-wav", "audio/vnd.wave"] },
"wma": { "ext": ".wma", "mimes": ["audio/x-ms-wma"] },
"tta": { "ext": ".tta", "mimes": ["audio/x-tta"] },
"wv": { "ext": ".wv", "mimes": ["audio/x-wavpack", "audio/wavpack"] },
"mka": { "ext": ".mka", "mimes": ["audio/x-matroska", "video/x-matroska"] }
"mp3": {"ext": ".mp3", "mimes": ["audio/mpeg", "audio/mp3"]},
"m4a": {"ext": ".m4a", "mimes": ["audio/mp4", "audio/x-m4a"]},
"ogg": {"ext": ".ogg", "mimes": ["audio/ogg"]},
"opus": {"ext": ".opus", "mimes": ["audio/opus"]},
"flac": {"ext": ".flac", "mimes": ["audio/flac"]},
"wav": {"ext": ".wav", "mimes": ["audio/wav", "audio/x-wav", "audio/vnd.wave"]},
"wma": {"ext": ".wma", "mimes": ["audio/x-ms-wma"]},
"tta": {"ext": ".tta", "mimes": ["audio/x-tta"]},
"wv": {"ext": ".wv", "mimes": ["audio/x-wavpack", "audio/wavpack"]},
"mka": {"ext": ".mka", "mimes": ["audio/x-matroska", "video/x-matroska"]},
},
"document": {
"pdf": { "ext": ".pdf", "mimes": ["application/pdf"] },
"epub": { "ext": ".epub", "mimes": ["application/epub+zip"] },
"djvu": { "ext": ".djvu", "mimes": ["application/vnd.djvu"] },
"rtf": { "ext": ".rtf", "mimes": ["application/rtf"] },
"docx": { "ext": ".docx", "mimes": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"] },
"xlsx": { "ext": ".xlsx", "mimes": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"] },
"pptx": { "ext": ".pptx", "mimes": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"] },
"doc": { "ext": ".doc", "mimes": ["application/msword"] },
"xls": { "ext": ".xls", "mimes": ["application/vnd.ms-excel"] },
"ppt": { "ext": ".ppt", "mimes": ["application/vnd.ms-powerpoint"] }
"pdf": {"ext": ".pdf", "mimes": ["application/pdf"]},
"epub": {"ext": ".epub", "mimes": ["application/epub+zip"]},
"djvu": {"ext": ".djvu", "mimes": ["application/vnd.djvu"]},
"rtf": {"ext": ".rtf", "mimes": ["application/rtf"]},
"docx": {
"ext": ".docx",
"mimes": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"],
},
"xlsx": {
"ext": ".xlsx",
"mimes": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"],
},
"pptx": {
"ext": ".pptx",
"mimes": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"],
},
"doc": {"ext": ".doc", "mimes": ["application/msword"]},
"xls": {"ext": ".xls", "mimes": ["application/vnd.ms-excel"]},
"ppt": {"ext": ".ppt", "mimes": ["application/vnd.ms-powerpoint"]},
},
"archive": {
"zip": { "ext": ".zip", "mimes": ["application/zip"] },
"7z": { "ext": ".7z", "mimes": ["application/x-7z-compressed"] },
"rar": { "ext": ".rar", "mimes": ["application/x-rar-compressed", "application/vnd.rar"] },
"gz": { "ext": ".gz", "mimes": ["application/gzip", "application/x-gzip"] },
"tar": { "ext": ".tar", "mimes": ["application/x-tar"] },
"cbz": { "ext": ".cbz", "mimes": ["application/zip"], "note": "zip archive of images; prefer extension-based detection for comics" }
"zip": {"ext": ".zip", "mimes": ["application/zip"]},
"7z": {"ext": ".7z", "mimes": ["application/x-7z-compressed"]},
"rar": {"ext": ".rar", "mimes": ["application/x-rar-compressed", "application/vnd.rar"]},
"gz": {"ext": ".gz", "mimes": ["application/gzip", "application/x-gzip"]},
"tar": {"ext": ".tar", "mimes": ["application/x-tar"]},
"cbz": {
"ext": ".cbz",
"mimes": ["application/zip"],
"note": "zip archive of images; prefer extension-based detection for comics",
},
},
"project": {
"clip": { "ext": ".clip", "mimes": ["application/clip"] },
"kra": { "ext": ".kra", "mimes": ["application/x-krita"] },
"procreate": { "ext": ".procreate", "mimes": ["application/x-procreate"] },
"psd": { "ext": ".psd", "mimes": ["image/vnd.adobe.photoshop"] },
"swf": { "ext": ".swf", "mimes": ["application/x-shockwave-flash"] }
"clip": {"ext": ".clip", "mimes": ["application/clip"]},
"kra": {"ext": ".kra", "mimes": ["application/x-krita"]},
"procreate": {"ext": ".procreate", "mimes": ["application/x-procreate"]},
"psd": {"ext": ".psd", "mimes": ["image/vnd.adobe.photoshop"]},
"swf": {"ext": ".swf", "mimes": ["application/x-shockwave-flash"]},
},
"other": {
"octet-stream": { "ext": "", "mimes": ["application/octet-stream"] },
"json": { "ext": ".json", "mimes": ["application/json"] },
"xml": { "ext": ".xml", "mimes": ["application/xml", "text/xml"] },
"csv": { "ext": ".csv", "mimes": ["text/csv"] }
}
"octet-stream": {"ext": "", "mimes": ["application/octet-stream"]},
"json": {"ext": ".json", "mimes": ["application/json"]},
"xml": {"ext": ".xml", "mimes": ["application/xml", "text/xml"]},
"csv": {"ext": ".csv", "mimes": ["text/csv"]},
},
}
@@ -90,15 +107,15 @@ def get_type_from_ext(ext: str) -> str:
Type string (e.g., 'image', 'video', 'audio') or 'other' if unknown
"""
if not ext:
return 'other'
return "other"
ext_clean = ext.lstrip('.').lower()
ext_clean = ext.lstrip(".").lower()
for type_name, extensions_dict in mime_maps.items():
if ext_clean in extensions_dict:
return type_name
return 'other'
return "other"
# Canonical supported extension set for all stores/cmdlets.
+77 -40
View File
@@ -20,8 +20,14 @@ logger = logging.getLogger(__name__)
class Worker:
"""Represents a single worker task with state management."""
def __init__(self, worker_id: str, worker_type: str, title: str = "",
description: str = "", manager: Optional['WorkerManager'] = None):
def __init__(
self,
worker_id: str,
worker_type: str,
title: str = "",
description: str = "",
manager: Optional["WorkerManager"] = None,
):
"""Initialize a worker.
Args:
@@ -140,9 +146,13 @@ class Worker:
class WorkerLoggingHandler(logging.StreamHandler):
"""Custom logging handler that captures logs for a worker."""
def __init__(self, worker_id: str, db: API_folder_store,
manager: Optional['WorkerManager'] = None,
buffer_size: int = 50):
def __init__(
self,
worker_id: str,
db: API_folder_store,
manager: Optional["WorkerManager"] = None,
buffer_size: int = 50,
):
"""Initialize the handler.
Args:
@@ -160,8 +170,7 @@ class WorkerLoggingHandler(logging.StreamHandler):
# Set a format that includes timestamp and level
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
"%(asctime)s - %(name)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
)
self.setFormatter(formatter)
@@ -186,7 +195,8 @@ class WorkerLoggingHandler(logging.StreamHandler):
# Add timestamp and level if not already in message
import time
timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(record.created))
timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(record.created))
msg = f"{timestamp} - {record.name} - {record.levelname} - {msg}"
with self._lock:
@@ -201,12 +211,12 @@ class WorkerLoggingHandler(logging.StreamHandler):
def _flush(self):
"""Flush buffered logs to database."""
if self.buffer:
log_text = '\n'.join(self.buffer)
log_text = "\n".join(self.buffer)
try:
if self.manager:
self.manager.append_worker_stdout(self.worker_id, log_text, channel='log')
self.manager.append_worker_stdout(self.worker_id, log_text, channel="log")
else:
self.db.append_worker_stdout(self.worker_id, log_text, channel='log')
self.db.append_worker_stdout(self.worker_id, log_text, channel="log")
except Exception as e:
# If we can't write to DB, at least log it
log(f"Error flushing worker logs: {e}")
@@ -332,7 +342,9 @@ class WorkerManager:
logger.debug(f"[WorkerManager] Enabled logging for worker: {worker_id}")
return handler
except Exception as e:
logger.error(f"[WorkerManager] Error enabling logging for worker {worker_id}: {e}", exc_info=True)
logger.error(
f"[WorkerManager] Error enabling logging for worker {worker_id}: {e}", exc_info=True
)
return None
def disable_logging_for_worker(self, worker_id: str) -> None:
@@ -356,11 +368,20 @@ class WorkerManager:
logger.debug(f"[WorkerManager] Disabled logging for worker: {worker_id}")
except Exception as e:
logger.error(f"[WorkerManager] Error disabling logging for worker {worker_id}: {e}", exc_info=True)
logger.error(
f"[WorkerManager] Error disabling logging for worker {worker_id}: {e}",
exc_info=True,
)
def track_worker(self, worker_id: str, worker_type: str, title: str = "",
description: str = "", total_steps: int = 0,
pipe: Optional[str] = None) -> bool:
def track_worker(
self,
worker_id: str,
worker_type: str,
title: str = "",
description: str = "",
total_steps: int = 0,
pipe: Optional[str] = None,
) -> bool:
"""Start tracking a new worker.
Args:
@@ -375,7 +396,9 @@ class WorkerManager:
True if worker was inserted successfully
"""
try:
result = self.db.insert_worker(worker_id, worker_type, title, description, total_steps, pipe=pipe)
result = self.db.insert_worker(
worker_id, worker_type, title, description, total_steps, pipe=pipe
)
if result > 0:
logger.debug(f"[WorkerManager] Tracking worker: {worker_id} ({worker_type})")
self._start_refresh_if_needed()
@@ -385,8 +408,14 @@ class WorkerManager:
logger.error(f"[WorkerManager] Error tracking worker: {e}", exc_info=True)
return False
def update_worker(self, worker_id: str, progress: float = 0.0, current_step: str = "",
details: str = "", error: str = "") -> bool:
def update_worker(
self,
worker_id: str,
progress: float = 0.0,
current_step: str = "",
details: str = "",
error: str = "",
) -> bool:
"""Update worker progress and status.
Args:
@@ -402,26 +431,27 @@ class WorkerManager:
try:
kwargs = {}
if progress > 0:
kwargs['progress'] = progress
kwargs["progress"] = progress
if current_step:
kwargs['current_step'] = current_step
kwargs["current_step"] = current_step
if details:
kwargs['description'] = details
kwargs["description"] = details
if error:
kwargs['error_message'] = error
kwargs["error_message"] = error
if kwargs:
kwargs['last_updated'] = datetime.now().isoformat()
if 'current_step' in kwargs and kwargs['current_step']:
self._worker_last_step[worker_id] = str(kwargs['current_step'])
kwargs["last_updated"] = datetime.now().isoformat()
if "current_step" in kwargs and kwargs["current_step"]:
self._worker_last_step[worker_id] = str(kwargs["current_step"])
return self.db.update_worker(worker_id, **kwargs)
return True
except Exception as e:
logger.error(f"[WorkerManager] Error updating worker {worker_id}: {e}", exc_info=True)
return False
def finish_worker(self, worker_id: str, result: str = "completed",
error_msg: str = "", result_data: str = "") -> bool:
def finish_worker(
self, worker_id: str, result: str = "completed", error_msg: str = "", result_data: str = ""
) -> bool:
"""Mark a worker as finished.
Args:
@@ -434,14 +464,11 @@ class WorkerManager:
True if update was successful
"""
try:
kwargs = {
'status': result,
'completed_at': datetime.now().isoformat()
}
kwargs = {"status": result, "completed_at": datetime.now().isoformat()}
if error_msg:
kwargs['error_message'] = error_msg
kwargs["error_message"] = error_msg
if result_data:
kwargs['result_data'] = result_data
kwargs["result_data"] = result_data
success = self.db.update_worker(worker_id, **kwargs)
logger.info(f"[WorkerManager] Worker finished: {worker_id} ({result})")
@@ -475,7 +502,9 @@ class WorkerManager:
try:
all_workers = self.db.get_all_workers(limit=limit)
# Filter to only finished workers
finished = [w for w in all_workers if w.get('status') in ['completed', 'error', 'cancelled']]
finished = [
w for w in all_workers if w.get("status") in ["completed", "error", "cancelled"]
]
return finished
except Exception as e:
logger.error(f"[WorkerManager] Error getting finished workers: {e}", exc_info=True)
@@ -516,7 +545,9 @@ class WorkerManager:
self._worker_last_step[worker_id] = step_text
return success
except Exception as e:
logger.error(f"[WorkerManager] Error logging step for worker {worker_id}: {e}", exc_info=True)
logger.error(
f"[WorkerManager] Error logging step for worker {worker_id}: {e}", exc_info=True
)
return False
def _get_last_step(self, worker_id: str) -> Optional[str]:
@@ -535,8 +566,10 @@ class WorkerManager:
try:
return self.db.get_worker_steps(worker_id)
except Exception as e:
logger.error(f"[WorkerManager] Error getting steps for worker {worker_id}: {e}", exc_info=True)
return ''
logger.error(
f"[WorkerManager] Error getting steps for worker {worker_id}: {e}", exc_info=True
)
return ""
def start_auto_refresh(self) -> None:
"""Start the auto-refresh thread for periodic worker updates."""
@@ -548,7 +581,9 @@ class WorkerManager:
logger.debug("[WorkerManager] Auto-refresh already running")
return
logger.info(f"[WorkerManager] Starting auto-refresh with {self.auto_refresh_interval}s interval")
logger.info(
f"[WorkerManager] Starting auto-refresh with {self.auto_refresh_interval}s interval"
)
self._stop_refresh = False
self.refresh_thread = Thread(target=self._auto_refresh_loop, daemon=True)
self.refresh_thread.start()
@@ -587,7 +622,9 @@ class WorkerManager:
try:
callback(active)
except Exception as e:
logger.error(f"[WorkerManager] Error in refresh callback: {e}", exc_info=True)
logger.error(
f"[WorkerManager] Error in refresh callback: {e}", exc_info=True
)
except Exception as e:
logger.error(f"[WorkerManager] Error in auto-refresh loop: {e}", exc_info=True)
+245 -111
View File
@@ -15,10 +15,10 @@ from Store._base import Store
def _normalize_hash(value: Any) -> Optional[str]:
candidate = str(value or '').strip().lower()
candidate = str(value or "").strip().lower()
if len(candidate) != 64:
return None
if any(ch not in '0123456789abcdef' for ch in candidate):
if any(ch not in "0123456789abcdef" for ch in candidate):
return None
return candidate
@@ -30,10 +30,9 @@ def _resolve_file_hash(db_hash: Optional[str], file_path: Path) -> Optional[str]
return _normalize_hash(file_path.stem)
class Folder(Store):
""""""
# Track which locations have already been migrated to avoid repeated migrations
_migrated_locations = set()
# Cache scan results to avoid repeated full scans across repeated instantiations
@@ -70,6 +69,7 @@ class Folder(Store):
from API.folder import API_folder_store
from API.folder import LocalLibraryInitializer
from pathlib import Path
location_path = Path(self._location).expanduser()
# Use context manager to ensure connection is properly closed
@@ -87,13 +87,15 @@ class Folder(Store):
try:
initializer = LocalLibraryInitializer(location_path)
stats = initializer.scan_and_index() or {}
files_new = int(stats.get('files_new', 0) or 0)
sidecars = int(stats.get('sidecars_imported', 0) or 0)
total_db = int(stats.get('files_total_db', 0) or 0)
files_new = int(stats.get("files_new", 0) or 0)
sidecars = int(stats.get("sidecars_imported", 0) or 0)
total_db = int(stats.get("files_total_db", 0) or 0)
if files_new > 0 or sidecars > 0:
detail = f"New: {files_new}, Sidecars: {sidecars}" + (f" (Total: {total_db})" if total_db else "")
detail = f"New: {files_new}, Sidecars: {sidecars}" + (
f" (Total: {total_db})" if total_db else ""
)
else:
detail = ("Up to date" + (f" (Total: {total_db})" if total_db else ""))
detail = "Up to date" + (f" (Total: {total_db})" if total_db else "")
Folder._scan_cache[location_key] = (True, detail, dict(stats))
except Exception as exc:
Folder._scan_cache[location_key] = (False, f"Scan failed: {exc}", {})
@@ -112,6 +114,7 @@ class Folder(Store):
return
from pathlib import Path
location_path = Path(location).expanduser()
location_str = str(location_path)
@@ -143,14 +146,16 @@ class Folder(Store):
continue
# Skip database files and sidecars
if file_path.suffix in ('.db', '.metadata', '.tag', '-shm', '-wal'):
if file_path.suffix in (".db", ".metadata", ".tag", "-shm", "-wal"):
continue
# Also skip if the file ends with -shm or -wal (SQLite journal files)
if file_path.name.endswith(('-shm', '-wal')):
if file_path.name.endswith(("-shm", "-wal")):
continue
# Check if filename is already a hash (without extension)
if len(file_path.stem) == 64 and all(c in '0123456789abcdef' for c in file_path.stem.lower()):
if len(file_path.stem) == 64 and all(
c in "0123456789abcdef" for c in file_path.stem.lower()
):
continue # Already migrated, will process in second pass
try:
@@ -173,14 +178,22 @@ class Folder(Store):
if tags:
tags_to_add = list(tags)
# Check if title tag exists
has_title_tag = any(t.lower().startswith('title:') for t in tags_to_add)
has_title_tag = any(
t.lower().startswith("title:") for t in tags_to_add
)
if url:
url_to_add = list(url)
debug(f"Found sidecar for {file_path.name}: {len(tags_to_add)} tags, {len(url_to_add)} url", file=sys.stderr)
debug(
f"Found sidecar for {file_path.name}: {len(tags_to_add)} tags, {len(url_to_add)} url",
file=sys.stderr,
)
# Delete the sidecar after importing
sidecar_path.unlink()
except Exception as exc:
debug(f"Failed to read sidecar for {file_path.name}: {exc}", file=sys.stderr)
debug(
f"Failed to read sidecar for {file_path.name}: {exc}",
file=sys.stderr,
)
# Ensure there's a title tag (use original filename if not present)
if not has_title_tag:
@@ -188,7 +201,9 @@ class Folder(Store):
# Rename file to hash if needed
if hash_path != file_path and not hash_path.exists():
debug(f"Migrating: {file_path.name} -> {hash_filename}", file=sys.stderr)
debug(
f"Migrating: {file_path.name} -> {hash_filename}", file=sys.stderr
)
file_path.rename(hash_path)
# Ensure DB points to the renamed path (update by hash).
@@ -204,35 +219,45 @@ class Folder(Store):
db.get_or_create_file_entry(hash_path)
# Save extension metadata
ext_clean = file_ext.lstrip('.') if file_ext else ''
db.save_metadata(hash_path, {
'hash': file_hash,
'ext': ext_clean,
'size': hash_path.stat().st_size
})
ext_clean = file_ext.lstrip(".") if file_ext else ""
db.save_metadata(
hash_path,
{
"hash": file_hash,
"ext": ext_clean,
"size": hash_path.stat().st_size,
},
)
# Add all tags (including title tag)
if tags_to_add:
db.save_tags(hash_path, tags_to_add)
debug(f"Added {len(tags_to_add)} tags to {file_hash}", file=sys.stderr)
debug(
f"Added {len(tags_to_add)} tags to {file_hash}", file=sys.stderr
)
# Note: url would need a separate table if you want to store them
# For now, we're just noting them in debug
if url_to_add:
debug(f"Imported {len(url_to_add)} url for {file_hash}: {url_to_add}", file=sys.stderr)
debug(
f"Imported {len(url_to_add)} url for {file_hash}: {url_to_add}",
file=sys.stderr,
)
except Exception as exc:
debug(f"Failed to migrate file {file_path.name}: {exc}", file=sys.stderr)
# Second pass: ensure all files in database have a title: tag
db.connection.commit()
cursor.execute('''
cursor.execute(
"""
SELECT f.hash, f.file_path
FROM files f
WHERE NOT EXISTS (
SELECT 1 FROM tags t WHERE t.hash = f.hash AND LOWER(t.tag) LIKE 'title:%'
)
''')
"""
)
files_without_title = cursor.fetchall()
for file_hash, file_path_str in files_without_title:
@@ -244,13 +269,16 @@ class Folder(Store):
db.save_tags(file_path, [title_tag])
debug(f"Added title tag to {file_path.name}", file=sys.stderr)
except Exception as exc:
debug(f"Failed to add title tag to file {file_path_str}: {exc}", file=sys.stderr)
debug(
f"Failed to add title tag to file {file_path_str}: {exc}",
file=sys.stderr,
)
db.connection.commit()
# Third pass: discover files on disk that aren't in the database yet
# These are hash-named files that were added after initial indexing
cursor.execute('SELECT LOWER(hash) FROM files')
cursor.execute("SELECT LOWER(hash) FROM files")
db_hashes = {row[0] for row in cursor.fetchall()}
discovered = 0
@@ -258,7 +286,9 @@ class Folder(Store):
if file_path.is_file():
# Check if file name (without extension) is a 64-char hex hash
name_without_ext = file_path.stem
if len(name_without_ext) == 64 and all(c in '0123456789abcdef' for c in name_without_ext.lower()):
if len(name_without_ext) == 64 and all(
c in "0123456789abcdef" for c in name_without_ext.lower()
):
file_hash = name_without_ext.lower()
# Skip if already in DB
@@ -271,24 +301,32 @@ class Folder(Store):
# Save extension metadata
file_ext = file_path.suffix
ext_clean = file_ext.lstrip('.') if file_ext else ''
db.save_metadata(file_path, {
'hash': file_hash,
'ext': ext_clean,
'size': file_path.stat().st_size
})
ext_clean = file_ext.lstrip(".") if file_ext else ""
db.save_metadata(
file_path,
{
"hash": file_hash,
"ext": ext_clean,
"size": file_path.stat().st_size,
},
)
discovered += 1
except Exception as e:
debug(f"Failed to discover file {file_path.name}: {e}", file=sys.stderr)
debug(
f"Failed to discover file {file_path.name}: {e}",
file=sys.stderr,
)
if discovered > 0:
debug(f"Discovered and indexed {discovered} undiscovered files in {location_path.name}", file=sys.stderr)
debug(
f"Discovered and indexed {discovered} undiscovered files in {location_path.name}",
file=sys.stderr,
)
db.connection.commit()
except Exception as exc:
debug(f"Migration to hash storage failed: {exc}", file=sys.stderr)
def location(self) -> str:
return self._location
@@ -340,6 +378,7 @@ class Folder(Store):
# Check if file already exists
from API.folder import API_folder_store
with API_folder_store(Path(self._location)) as db:
existing_path = db.search_hash(file_hash)
if existing_path and existing_path.exists():
@@ -406,6 +445,7 @@ class Folder(Store):
duration_value: float | None = None
try:
from SYS.utils import ffprobe
probe = ffprobe(str(save_file))
duration = probe.get("duration")
if isinstance(duration, (int, float)) and duration > 0:
@@ -417,13 +457,16 @@ class Folder(Store):
with API_folder_store(Path(self._location)) as db:
db.get_or_create_file_entry(save_file)
# Save metadata including extension
ext_clean = file_ext.lstrip('.') if file_ext else ''
db.save_metadata(save_file, {
'hash': file_hash,
'ext': ext_clean,
'size': save_file.stat().st_size,
'duration': duration_value,
})
ext_clean = file_ext.lstrip(".") if file_ext else ""
db.save_metadata(
save_file,
{
"hash": file_hash,
"ext": ext_clean,
"size": save_file.stat().st_size,
"duration": duration_value,
},
)
# Add tags if provided
if tag_list:
@@ -482,7 +525,7 @@ class Folder(Store):
return s.strip().lower()
def _normalize_ext_filter(value: str) -> str:
v = str(value or "").strip().lower().lstrip('.')
v = str(value or "").strip().lower().lstrip(".")
v = "".join(ch for ch in v if ch.isalnum())
return v
@@ -503,7 +546,7 @@ class Folder(Store):
if sys_ext:
ext_filter = sys_ext
query_lower = re.sub(r"\s*\bsystem:filetype\s*(?:=\s*)?[^\s,]+", " ", query_lower)
query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(',')
query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(",")
query = query_lower
m = re.search(r"\bext:([^\s,]+)", query_lower)
@@ -512,7 +555,7 @@ class Folder(Store):
if m:
ext_filter = _normalize_ext_filter(m.group(1)) or None
query_lower = re.sub(r"\s*\b(?:ext|extension):[^\s,]+", " ", query_lower)
query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(',')
query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(",")
query = query_lower
except Exception:
ext_filter = None
@@ -538,9 +581,9 @@ class Folder(Store):
def _like_pattern(term: str) -> str:
# Convert glob-like tokens to SQL LIKE wildcards.
return str(term or "").replace('*', '%').replace('?', '_')
return str(term or "").replace("*", "%").replace("?", "_")
tokens = [t.strip() for t in query.split(',') if t.strip()]
tokens = [t.strip() for t in query.split(",") if t.strip()]
if not match_all and len(tokens) == 1 and _normalize_hash(query):
debug("Hash queries require 'hash:' prefix for local search")
@@ -550,21 +593,23 @@ class Folder(Store):
debug("Hash queries require 'hash:' prefix for local search")
return results
def _create_entry(file_path: Path, tags: list[str], size_bytes: int | None, db_hash: Optional[str]) -> dict[str, Any]:
def _create_entry(
file_path: Path, tags: list[str], size_bytes: int | None, db_hash: Optional[str]
) -> dict[str, Any]:
path_str = str(file_path)
# Get title from tags if available, otherwise use hash as fallback
title = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
title = next((t.split(":", 1)[1] for t in tags if t.lower().startswith("title:")), None)
if not title:
# Fallback to hash if no title tag exists
hash_value = _resolve_file_hash(db_hash, file_path)
title = hash_value if hash_value else file_path.stem
# Extract extension from file path
ext = file_path.suffix.lstrip('.')
ext = file_path.suffix.lstrip(".")
if not ext:
# Fallback: try to extract from title (original filename might be in title)
title_path = Path(title)
ext = title_path.suffix.lstrip('.')
ext = title_path.suffix.lstrip(".")
# Build clean entry with only necessary fields
hash_value = _resolve_file_hash(db_hash, file_path)
@@ -610,7 +655,7 @@ class Folder(Store):
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
try:
db_ext = str(ext or "").strip().lstrip('.')
db_ext = str(ext or "").strip().lstrip(".")
if db_ext:
entry["ext"] = db_ext
except Exception:
@@ -618,7 +663,9 @@ class Folder(Store):
results.append(entry)
if limit is not None and len(results) >= limit:
return results
backend_label = str(getattr(self, "_name", "") or getattr(self, "NAME", "") or "folder")
backend_label = str(
getattr(self, "_name", "") or getattr(self, "NAME", "") or "folder"
)
debug(f"[folder:{backend_label}] {len(results)} result(s)")
return results
@@ -630,41 +677,49 @@ class Folder(Store):
if not token:
return set()
if ':' in token and not token.startswith(':'):
namespace, pattern = token.split(':', 1)
if ":" in token and not token.startswith(":"):
namespace, pattern = token.split(":", 1)
namespace = namespace.strip().lower()
pattern = pattern.strip().lower()
if namespace == 'hash':
if namespace == "hash":
normalized_hash = _normalize_hash(pattern)
if not normalized_hash:
return set()
h = api.get_file_hash_by_hash(normalized_hash)
return {h} if h else set()
if namespace == 'url':
if not pattern or pattern == '*':
return api.get_file_hashes_with_any_url(limit=url_fetch_limit)
return api.get_file_hashes_by_url_like(_url_like_pattern(pattern), limit=url_fetch_limit)
if namespace == "url":
if not pattern or pattern == "*":
return api.get_file_hashes_with_any_url(
limit=url_fetch_limit
)
return api.get_file_hashes_by_url_like(
_url_like_pattern(pattern), limit=url_fetch_limit
)
if namespace == 'system':
if namespace == "system":
# Hydrus-compatible query: system:filetype = png
m_ft = re.match(r"^filetype\s*(?:=\s*)?(.+)$", pattern)
if m_ft:
normalized_ext = _normalize_ext_filter(m_ft.group(1))
if not normalized_ext:
return set()
return api.get_file_hashes_by_ext(normalized_ext, limit=url_fetch_limit)
return api.get_file_hashes_by_ext(
normalized_ext, limit=url_fetch_limit
)
return set()
if namespace in {'ext', 'extension'}:
if namespace in {"ext", "extension"}:
normalized_ext = _normalize_ext_filter(pattern)
if not normalized_ext:
return set()
return api.get_file_hashes_by_ext(normalized_ext, limit=url_fetch_limit)
return api.get_file_hashes_by_ext(
normalized_ext, limit=url_fetch_limit
)
if namespace == 'store':
if pattern not in {'local', 'file', 'filesystem'}:
if namespace == "store":
if pattern not in {"local", "file", "filesystem"}:
return set()
return api.get_all_file_hashes()
@@ -677,7 +732,9 @@ class Folder(Store):
tag_lower = str(tag_val).lower()
if not tag_lower.startswith(f"{namespace}:"):
continue
value = _normalize_namespace_text(tag_lower[len(namespace) + 1 :], allow_wildcards=False)
value = _normalize_namespace_text(
tag_lower[len(namespace) + 1 :], allow_wildcards=False
)
pat = _normalize_namespace_text(pattern, allow_wildcards=True)
if fnmatch(value, pat):
matched.add(file_hash)
@@ -690,14 +747,22 @@ class Folder(Store):
hashes = set(api.get_file_hashes_by_path_pattern(like_pattern) or set())
try:
title_rows = api.get_files_by_namespace_pattern(f"title:{like_pattern}", url_fetch_limit)
hashes.update({row[0] for row in (title_rows or []) if row and row[0]})
title_rows = api.get_files_by_namespace_pattern(
f"title:{like_pattern}", url_fetch_limit
)
hashes.update(
{row[0] for row in (title_rows or []) if row and row[0]}
)
except Exception:
pass
try:
simple_rows = api.get_files_by_simple_tag_pattern(like_pattern, url_fetch_limit)
hashes.update({row[0] for row in (simple_rows or []) if row and row[0]})
simple_rows = api.get_files_by_simple_tag_pattern(
like_pattern, url_fetch_limit
)
hashes.update(
{row[0] for row in (simple_rows or []) if row and row[0]}
)
except Exception:
pass
@@ -707,7 +772,9 @@ class Folder(Store):
matching_hashes: set[str] | None = None
for token in tokens:
hashes = _ids_for_token(token)
matching_hashes = hashes if matching_hashes is None else matching_hashes & hashes
matching_hashes = (
hashes if matching_hashes is None else matching_hashes & hashes
)
if not matching_hashes:
return results
@@ -734,7 +801,7 @@ class Folder(Store):
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
try:
db_ext = str(ext or "").strip().lstrip('.')
db_ext = str(ext or "").strip().lstrip(".")
if db_ext:
entry["ext"] = db_ext
except Exception:
@@ -775,7 +842,7 @@ class Folder(Store):
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
try:
db_ext = str(ext or "").strip().lstrip('.')
db_ext = str(ext or "").strip().lstrip(".")
if db_ext:
entry["ext"] = db_ext
except Exception:
@@ -830,7 +897,7 @@ class Folder(Store):
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
try:
db_ext = str(ext or "").strip().lstrip('.')
db_ext = str(ext or "").strip().lstrip(".")
if db_ext:
entry["ext"] = db_ext
except Exception:
@@ -859,7 +926,7 @@ class Folder(Store):
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
try:
db_ext = str(ext or "").strip().lstrip('.')
db_ext = str(ext or "").strip().lstrip(".")
if db_ext:
entry["ext"] = db_ext
except Exception:
@@ -882,7 +949,9 @@ class Folder(Store):
for tag in tags:
tag_lower = tag.lower()
if tag_lower.startswith(f"{namespace}:"):
value = _normalize_namespace_text(tag_lower[len(namespace) + 1 :], allow_wildcards=False)
value = _normalize_namespace_text(
tag_lower[len(namespace) + 1 :], allow_wildcards=False
)
pat = _normalize_namespace_text(pattern, allow_wildcards=True)
if fnmatch(value, pat):
if ext_hashes is not None and file_hash not in ext_hashes:
@@ -892,9 +961,11 @@ class Folder(Store):
if size_bytes is None:
size_bytes = file_path.stat().st_size
all_tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, all_tags, size_bytes, file_hash)
entry = _create_entry(
file_path, all_tags, size_bytes, file_hash
)
try:
db_ext = str(ext or "").strip().lstrip('.')
db_ext = str(ext or "").strip().lstrip(".")
if db_ext:
entry["ext"] = db_ext
except Exception:
@@ -913,7 +984,9 @@ class Folder(Store):
# - title: namespace tag
# - non-namespaced tag
# Other namespaces (artist:, series:, etc.) are excluded unless explicitly queried.
terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
terms = [
t.strip() for t in query_lower.replace(",", " ").split() if t.strip()
]
if not terms:
terms = [query_lower]
@@ -928,26 +1001,40 @@ class Folder(Store):
term_hashes: set[str] = set()
try:
term_hashes.update(api.get_file_hashes_by_path_pattern(like_pattern))
term_hashes.update(
api.get_file_hashes_by_path_pattern(like_pattern)
)
except Exception:
pass
try:
title_rows = api.get_files_by_namespace_pattern(f"title:{like_pattern}", fetch_limit)
term_hashes.update({row[0] for row in (title_rows or []) if row and row[0]})
title_rows = api.get_files_by_namespace_pattern(
f"title:{like_pattern}", fetch_limit
)
term_hashes.update(
{row[0] for row in (title_rows or []) if row and row[0]}
)
except Exception:
pass
try:
simple_rows = api.get_files_by_simple_tag_pattern(like_pattern, fetch_limit)
term_hashes.update({row[0] for row in (simple_rows or []) if row and row[0]})
simple_rows = api.get_files_by_simple_tag_pattern(
like_pattern, fetch_limit
)
term_hashes.update(
{row[0] for row in (simple_rows or []) if row and row[0]}
)
except Exception:
pass
if ext_hashes is not None:
term_hashes &= ext_hashes
matching_hashes = term_hashes if matching_hashes is None else (matching_hashes & term_hashes)
matching_hashes = (
term_hashes
if matching_hashes is None
else (matching_hashes & term_hashes)
)
if not matching_hashes:
return results
@@ -969,7 +1056,7 @@ class Folder(Store):
tags = api.get_tags_for_file(file_hash)
entry_obj = _create_entry(file_path, tags, size_bytes, file_hash)
try:
db_ext = str(ext or "").strip().lstrip('.')
db_ext = str(ext or "").strip().lstrip(".")
if db_ext:
entry_obj["ext"] = db_ext
except Exception:
@@ -992,14 +1079,16 @@ class Folder(Store):
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
try:
db_ext = str(ext or "").strip().lstrip('.')
db_ext = str(ext or "").strip().lstrip(".")
if db_ext:
entry["ext"] = db_ext
except Exception:
pass
results.append(entry)
backend_label = str(getattr(self, "_name", "") or getattr(self, "NAME", "") or "folder")
backend_label = str(
getattr(self, "_name", "") or getattr(self, "NAME", "") or "folder"
)
debug(f"[folder:{backend_label}] {len(results)} result(s)")
return results
@@ -1012,7 +1101,6 @@ class Folder(Store):
log(f"❌ Local search failed: {exc}", file=sys.stderr)
raise
def _resolve_library_root(self, file_path: Path, config: Dict[str, Any]) -> Optional[Path]:
"""Return the library root containing medios-macina.db.
@@ -1096,9 +1184,12 @@ class Folder(Store):
# Query metadata directly from database
cursor = api.get_cursor()
cursor.execute("""
cursor.execute(
"""
SELECT * FROM metadata WHERE hash = ?
""", (file_hash_result,))
""",
(file_hash_result,),
)
row = cursor.fetchone()
if not row:
@@ -1112,12 +1203,12 @@ class Folder(Store):
metadata.pop("file_path", None)
# Parse JSON fields
for field in ['url', 'relationships']:
for field in ["url", "relationships"]:
if metadata.get(field):
try:
metadata[field] = json.loads(metadata[field])
except (json.JSONDecodeError, TypeError):
metadata[field] = [] if field == 'url' else []
metadata[field] = [] if field == "url" else []
return metadata
except Exception as exc:
@@ -1131,6 +1222,7 @@ class Folder(Store):
Tuple of (tags_list, store_name) where store_name is the actual store name
"""
from API.folder import API_folder_store
try:
file_hash = file_identifier
if self._location:
@@ -1140,7 +1232,11 @@ class Folder(Store):
if db_tags:
# Return actual store name instead of generic "local_db"
store_name = self._name if self._name else "local"
return [str(t).strip().lower() for t in db_tags if isinstance(t, str) and t.strip()], store_name
return [
str(t).strip().lower()
for t in db_tags
if isinstance(t, str) and t.strip()
], store_name
except Exception as exc:
debug(f"Local DB lookup failed: {exc}")
return [], "unknown"
@@ -1155,17 +1251,22 @@ class Folder(Store):
Returns True if tags were successfully added.
"""
from API.folder import API_folder_store
try:
if not self._location:
return False
try:
with API_folder_store(Path(self._location)) as db:
existing_tags = [t for t in (db.get_tags(hash) or []) if isinstance(t, str) and t.strip()]
existing_tags = [
t for t in (db.get_tags(hash) or []) if isinstance(t, str) and t.strip()
]
from metadata import compute_namespaced_tag_overwrite
_to_remove, _to_add, merged = compute_namespaced_tag_overwrite(existing_tags, tag or [])
_to_remove, _to_add, merged = compute_namespaced_tag_overwrite(
existing_tags, tag or []
)
if not _to_remove and not _to_add:
return True
@@ -1196,12 +1297,17 @@ class Folder(Store):
def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
"""Remove tags from a local file by hash."""
from API.folder import API_folder_store
try:
file_hash = file_identifier
if self._location:
try:
with API_folder_store(Path(self._location)) as db:
tag_list = [str(t).strip().lower() for t in (tags or []) if isinstance(t, str) and str(t).strip()]
tag_list = [
str(t).strip().lower()
for t in (tags or [])
if isinstance(t, str) and str(t).strip()
]
if not tag_list:
return True
db.remove_tags_from_hash(file_hash, tag_list)
@@ -1216,11 +1322,13 @@ class Folder(Store):
def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]:
"""Get known url for a local file by hash."""
from API.folder import API_folder_store
try:
file_hash = file_identifier
if self._location:
try:
from metadata import normalize_urls
with API_folder_store(Path(self._location)) as db:
meta = db.get_metadata(file_hash) or {}
urls = normalize_urls(meta.get("url"))
@@ -1235,11 +1343,13 @@ class Folder(Store):
def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
"""Add known url to a local file by hash."""
from API.folder import API_folder_store
try:
file_hash = file_identifier
if self._location:
try:
from metadata import normalize_urls
with API_folder_store(Path(self._location)) as db:
meta = db.get_metadata(file_hash) or {}
existing_urls = normalize_urls(meta.get("url"))
@@ -1267,6 +1377,7 @@ class Folder(Store):
This is a performance optimization used by cmdlets that receive many PipeObjects.
"""
from API.folder import API_folder_store
try:
if not self._location:
return False
@@ -1278,7 +1389,7 @@ class Folder(Store):
normalize_urls = None # type: ignore
merged_by_hash: Dict[str, List[str]] = {}
for file_identifier, url_list in (items or []):
for file_identifier, url_list in items or []:
file_hash = str(file_identifier or "").strip().lower()
if not file_hash:
continue
@@ -1315,7 +1426,9 @@ class Folder(Store):
# Ensure metadata rows exist (may be needed for older entries).
for file_hash in merged_by_hash.keys():
try:
cursor.execute("INSERT OR IGNORE INTO metadata (hash) VALUES (?)", (file_hash,))
cursor.execute(
"INSERT OR IGNORE INTO metadata (hash) VALUES (?)", (file_hash,)
)
except Exception:
continue
@@ -1329,7 +1442,9 @@ class Folder(Store):
continue
placeholders = ",".join(["?"] * len(chunk))
try:
cursor.execute(f"SELECT hash, url FROM metadata WHERE hash IN ({placeholders})", chunk)
cursor.execute(
f"SELECT hash, url FROM metadata WHERE hash IN ({placeholders})", chunk
)
rows = cursor.fetchall() or []
except Exception:
rows = []
@@ -1353,7 +1468,9 @@ class Folder(Store):
parsed_urls = normalize_urls(parsed)
else:
if isinstance(parsed, list):
parsed_urls = [str(u).strip() for u in parsed if str(u).strip()]
parsed_urls = [
str(u).strip() for u in parsed if str(u).strip()
]
except Exception:
parsed_urls = []
@@ -1388,11 +1505,13 @@ class Folder(Store):
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
"""Delete known url from a local file by hash."""
from API.folder import API_folder_store
try:
file_hash = file_identifier
if self._location:
try:
from metadata import normalize_urls
with API_folder_store(Path(self._location)) as db:
meta = db.get_metadata(file_hash) or {}
existing_urls = normalize_urls(meta.get("url"))
@@ -1413,6 +1532,7 @@ class Folder(Store):
def delete_url_bulk(self, items: List[tuple[str, List[str]]], **kwargs: Any) -> bool:
"""Delete known urls from many local files in one DB session."""
from API.folder import API_folder_store
try:
if not self._location:
return False
@@ -1423,7 +1543,7 @@ class Folder(Store):
normalize_urls = None # type: ignore
remove_by_hash: Dict[str, set[str]] = {}
for file_identifier, url_list in (items or []):
for file_identifier, url_list in items or []:
file_hash = str(file_identifier or "").strip().lower()
if not file_hash:
continue
@@ -1456,7 +1576,9 @@ class Folder(Store):
# Ensure metadata rows exist.
for file_hash in remove_by_hash.keys():
try:
cursor.execute("INSERT OR IGNORE INTO metadata (hash) VALUES (?)", (file_hash,))
cursor.execute(
"INSERT OR IGNORE INTO metadata (hash) VALUES (?)", (file_hash,)
)
except Exception:
continue
@@ -1470,7 +1592,9 @@ class Folder(Store):
continue
placeholders = ",".join(["?"] * len(chunk))
try:
cursor.execute(f"SELECT hash, url FROM metadata WHERE hash IN ({placeholders})", chunk)
cursor.execute(
f"SELECT hash, url FROM metadata WHERE hash IN ({placeholders})", chunk
)
rows = cursor.fetchall() or []
except Exception:
rows = []
@@ -1494,7 +1618,9 @@ class Folder(Store):
parsed_urls = normalize_urls(parsed)
else:
if isinstance(parsed, list):
parsed_urls = [str(u).strip() for u in parsed if str(u).strip()]
parsed_urls = [
str(u).strip() for u in parsed if str(u).strip()
]
except Exception:
parsed_urls = []
@@ -1526,6 +1652,7 @@ class Folder(Store):
def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]:
"""Get notes for a local file by hash."""
from API.folder import API_folder_store
try:
if not self._location:
return {}
@@ -1547,6 +1674,7 @@ class Folder(Store):
def set_note(self, file_identifier: str, name: str, text: str, **kwargs: Any) -> bool:
"""Set a named note for a local file by hash."""
from API.folder import API_folder_store
try:
if not self._location:
return False
@@ -1576,13 +1704,14 @@ class Folder(Store):
map to a file path that exists on disk.
"""
from API.folder import API_folder_store
try:
if not self._location:
return False
# Normalize input.
normalized: List[tuple[str, str, str]] = []
for file_identifier, name, text in (items or []):
for file_identifier, name, text in items or []:
file_hash = str(file_identifier or "").strip().lower()
note_name = str(name or "").strip()
note_text = str(text or "")
@@ -1609,7 +1738,10 @@ class Folder(Store):
continue
placeholders = ",".join(["?"] * len(chunk))
try:
cursor.execute(f"SELECT hash, file_path FROM files WHERE hash IN ({placeholders})", chunk)
cursor.execute(
f"SELECT hash, file_path FROM files WHERE hash IN ({placeholders})",
chunk,
)
rows = cursor.fetchall() or []
except Exception:
rows = []
@@ -1660,6 +1792,7 @@ class Folder(Store):
def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool:
"""Delete a named note for a local file by hash."""
from API.folder import API_folder_store
try:
if not self._location:
return False
@@ -1695,6 +1828,7 @@ class Folder(Store):
True if deletion succeeded, False otherwise
"""
from API.folder import API_folder_store
try:
if not self._location:
return False
+122 -74
View File
@@ -104,7 +104,9 @@ class HydrusNetwork(Store):
verify_resp.raise_for_status()
verify_payload = verify_resp.json()
if not isinstance(verify_payload, dict):
raise RuntimeError("Hydrus /verify_access_key returned an unexpected response")
raise RuntimeError(
"Hydrus /verify_access_key returned an unexpected response"
)
_HYDRUS_INIT_CHECK_CACHE[cache_key] = (True, None)
except Exception as exc:
@@ -222,7 +224,11 @@ class HydrusNetwork(Store):
tag_list = [title_tag] + list(tag_list)
# Hydrus is lowercase-only tags; normalize here for consistency.
tag_list = [str(t).strip().lower() for t in (tag_list or []) if isinstance(t, str) and str(t).strip()]
tag_list = [
str(t).strip().lower()
for t in (tag_list or [])
if isinstance(t, str) and str(t).strip()
]
try:
# Compute file hash
@@ -307,14 +313,19 @@ class HydrusNetwork(Store):
# Associate url if provided (both for new and existing files)
if url:
log(f"{self._log_prefix()} Associating {len(url)} URL(s) with file", file=sys.stderr)
log(
f"{self._log_prefix()} Associating {len(url)} URL(s) with file", file=sys.stderr
)
for url in url:
if url:
try:
client.associate_url(file_hash, str(url))
debug(f"{self._log_prefix()} Associated URL: {url}")
except Exception as exc:
log(f"{self._log_prefix()} ⚠️ Failed to associate URL {url}: {exc}", file=sys.stderr)
log(
f"{self._log_prefix()} ⚠️ Failed to associate URL {url}: {exc}",
file=sys.stderr,
)
return file_hash
@@ -366,7 +377,9 @@ class HydrusNetwork(Store):
return out
return []
def _iter_url_filtered_metadata(url_value: str | None, want_any: bool, fetch_limit: int) -> list[dict[str, Any]]:
def _iter_url_filtered_metadata(
url_value: str | None, want_any: bool, fetch_limit: int
) -> list[dict[str, Any]]:
"""Best-effort URL search by scanning Hydrus metadata with include_file_url=True."""
# First try a fast system predicate if Hydrus supports it.
@@ -382,7 +395,11 @@ class HydrusNetwork(Store):
)
ids = url_search.get("file_ids", []) if isinstance(url_search, dict) else []
if isinstance(ids, list):
candidate_file_ids = [int(x) for x in ids if isinstance(x, (int, float, str)) and str(x).strip().isdigit()]
candidate_file_ids = [
int(x)
for x in ids
if isinstance(x, (int, float, str)) and str(x).strip().isdigit()
]
except Exception:
candidate_file_ids = []
@@ -451,7 +468,7 @@ class HydrusNetwork(Store):
# Support `ext:<value>` anywhere in the query. We filter results by the
# Hydrus metadata extension field.
def _normalize_ext_filter(value: str) -> str:
v = str(value or "").strip().lower().lstrip('.')
v = str(value or "").strip().lower().lstrip(".")
v = "".join(ch for ch in v if ch.isalnum())
return v
@@ -464,7 +481,7 @@ class HydrusNetwork(Store):
if m:
ext_filter = _normalize_ext_filter(m.group(1)) or None
query_lower = re.sub(r"\s*\b(?:ext|extension):[^\s,]+", " ", query_lower)
query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(',')
query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(",")
query = query_lower
if ext_filter and not query_lower:
query = "*"
@@ -486,21 +503,33 @@ class HydrusNetwork(Store):
pattern = pattern.strip()
if namespace == "url":
if not pattern or pattern == "*":
metadata_list = _iter_url_filtered_metadata(None, want_any=True, fetch_limit=int(limit) if limit else 100)
metadata_list = _iter_url_filtered_metadata(
None, want_any=True, fetch_limit=int(limit) if limit else 100
)
else:
# Fast-path: exact URL via /add_urls/get_url_files when a full URL is provided.
try:
if pattern.startswith("http://") or pattern.startswith("https://"):
from API.HydrusNetwork import HydrusRequestSpec
spec = HydrusRequestSpec(method="GET", endpoint="/add_urls/get_url_files", query={"url": pattern})
spec = HydrusRequestSpec(
method="GET",
endpoint="/add_urls/get_url_files",
query={"url": pattern},
)
response = client._perform_request(spec) # type: ignore[attr-defined]
hashes: list[str] = []
file_ids: list[int] = []
if isinstance(response, dict):
raw_hashes = response.get("hashes") or response.get("file_hashes")
raw_hashes = response.get("hashes") or response.get(
"file_hashes"
)
if isinstance(raw_hashes, list):
hashes = [str(h).strip() for h in raw_hashes if isinstance(h, str) and str(h).strip()]
hashes = [
str(h).strip()
for h in raw_hashes
if isinstance(h, str) and str(h).strip()
]
raw_ids = response.get("file_ids")
if isinstance(raw_ids, list):
for item in raw_ids:
@@ -518,7 +547,11 @@ class HydrusNetwork(Store):
include_size=True,
include_mime=True,
)
metas = payload.get("metadata", []) if isinstance(payload, dict) else []
metas = (
payload.get("metadata", [])
if isinstance(payload, dict)
else []
)
if isinstance(metas, list):
metadata_list = [m for m in metas if isinstance(m, dict)]
elif hashes:
@@ -530,7 +563,11 @@ class HydrusNetwork(Store):
include_size=True,
include_mime=True,
)
metas = payload.get("metadata", []) if isinstance(payload, dict) else []
metas = (
payload.get("metadata", [])
if isinstance(payload, dict)
else []
)
if isinstance(metas, list):
metadata_list = [m for m in metas if isinstance(m, dict)]
except Exception:
@@ -538,7 +575,9 @@ class HydrusNetwork(Store):
# Fallback: substring scan
if metadata_list is None:
metadata_list = _iter_url_filtered_metadata(pattern, want_any=False, fetch_limit=int(limit) if limit else 100)
metadata_list = _iter_url_filtered_metadata(
pattern, want_any=False, fetch_limit=int(limit) if limit else 100
)
# Parse the query into tags
# "*" means "match all" - use system:everything tag in Hydrus
@@ -553,7 +592,7 @@ class HydrusNetwork(Store):
if query.strip() == "*":
tags = ["system:everything"]
elif ':' in query_lower:
elif ":" in query_lower:
tags = [query_lower]
else:
freeform_union_search = True
@@ -584,7 +623,9 @@ class HydrusNetwork(Store):
except (TypeError, ValueError):
continue
if isinstance(raw_hashes, list):
hashes_out = [str(h).strip() for h in raw_hashes if isinstance(h, str) and str(h).strip()]
hashes_out = [
str(h).strip() for h in raw_hashes if isinstance(h, str) and str(h).strip()
]
return ids_out, hashes_out
if metadata_list is None:
@@ -635,9 +676,7 @@ class HydrusNetwork(Store):
return []
search_result = client.search_files(
tags=tags,
return_hashes=True,
return_file_ids=True
tags=tags, return_hashes=True, return_file_ids=True
)
file_ids, hashes = _extract_search_ids(search_result)
@@ -676,12 +715,12 @@ class HydrusNetwork(Store):
if not isinstance(meta, dict):
continue
mime_type = meta.get("mime")
ext = str(meta.get("ext") or "").strip().lstrip('.')
ext = str(meta.get("ext") or "").strip().lstrip(".")
if not ext and mime_type:
for category in mime_maps.values():
for _ext_key, info in category.items():
if mime_type in info.get("mimes", []):
ext = str(info.get("ext", "")).strip().lstrip('.')
ext = str(info.get("ext", "")).strip().lstrip(".")
break
if ext:
break
@@ -696,6 +735,7 @@ class HydrusNetwork(Store):
all_tags: list[str] = []
title = f"Hydrus File {file_id}"
if isinstance(tags_set, dict):
def _collect(tag_list: Any) -> None:
nonlocal title
if not isinstance(tag_list, list):
@@ -708,7 +748,10 @@ class HydrusNetwork(Store):
if not tag_l:
continue
all_tags.append(tag_l)
if tag_l.startswith("title:") and title == f"Hydrus File {file_id}":
if (
tag_l.startswith("title:")
and title == f"Hydrus File {file_id}"
):
title = tag_l.split(":", 1)[1].strip()
for _service_name, service_tags in tags_set.items():
@@ -858,12 +901,12 @@ class HydrusNetwork(Store):
# Prefer Hydrus-provided extension (e.g. ".webm"); fall back to MIME map.
mime_type = meta.get("mime")
ext = str(meta.get("ext") or "").strip().lstrip('.')
ext = str(meta.get("ext") or "").strip().lstrip(".")
if not ext and mime_type:
for category in mime_maps.values():
for _ext_key, info in category.items():
if mime_type in info.get("mimes", []):
ext = str(info.get("ext", "")).strip().lstrip('.')
ext = str(info.get("ext", "")).strip().lstrip(".")
break
if ext:
break
@@ -871,13 +914,14 @@ class HydrusNetwork(Store):
# Filter results based on query type
# If user provided explicit namespace (has ':'), don't do substring filtering
# Just include what the tag search returned
has_namespace = ':' in query_lower
has_namespace = ":" in query_lower
if has_namespace:
# Explicit namespace search - already filtered by Hydrus tag search
# Include this result as-is
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}"
results.append({
results.append(
{
"hash": hash_hex,
"url": file_url,
"name": title,
@@ -889,11 +933,14 @@ class HydrusNetwork(Store):
"file_id": file_id,
"mime": mime_type,
"ext": ext,
})
}
)
else:
# Free-form search: check if search terms match title or FREEFORM tags.
# Do NOT implicitly match other namespace tags (except title:).
freeform_tags = [t for t in all_tags if isinstance(t, str) and t and (":" not in t)]
freeform_tags = [
t for t in all_tags if isinstance(t, str) and t and (":" not in t)
]
searchable_text = (title + " " + " ".join(freeform_tags)).lower()
match = True
@@ -905,7 +952,8 @@ class HydrusNetwork(Store):
if match:
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}"
results.append({
results.append(
{
"hash": hash_hex,
"url": file_url,
"name": title,
@@ -917,7 +965,8 @@ class HydrusNetwork(Store):
"file_id": file_id,
"mime": mime_type,
"ext": ext,
})
}
)
debug(f"{prefix} {len(results)} result(s)")
if ext_filter:
@@ -936,6 +985,7 @@ class HydrusNetwork(Store):
except Exception as exc:
log(f"❌ Hydrus search failed: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
raise
@@ -949,9 +999,11 @@ class HydrusNetwork(Store):
debug(f"{self._log_prefix()} get_file: start hash={file_hash[:12]}...")
# Build browser URL with access key
base_url = str(self.URL).rstrip('/')
base_url = str(self.URL).rstrip("/")
access_key = str(self.API)
browser_url = f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}"
browser_url = (
f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}"
)
debug(f"{self._log_prefix()} get_file: url={browser_url}")
return browser_url
@@ -972,7 +1024,9 @@ class HydrusNetwork(Store):
return False
reason = kwargs.get("reason")
reason_text = str(reason).strip() if isinstance(reason, str) and reason.strip() else None
reason_text = (
str(reason).strip() if isinstance(reason, str) and reason.strip() else None
)
# 1) Delete file
client.delete_files([file_hash], reason=reason_text)
@@ -1041,7 +1095,9 @@ class HydrusNetwork(Store):
# Hydrus may return mime as an int enum, or sometimes a human label.
mime_val = meta.get("mime")
filetype_human = meta.get("filetype_human") or meta.get("mime_human") or meta.get("mime_string")
filetype_human = (
meta.get("filetype_human") or meta.get("mime_human") or meta.get("mime_string")
)
# Determine ext: prefer Hydrus metadata ext, then filetype_human (when it looks like an ext),
# then title suffix, then file path suffix.
@@ -1114,18 +1170,15 @@ class HydrusNetwork(Store):
except Exception:
dur_int = None
raw_urls = (
meta.get("known_urls")
or meta.get("urls")
or meta.get("url")
or []
)
raw_urls = meta.get("known_urls") or meta.get("urls") or meta.get("url") or []
url_list: list[str] = []
if isinstance(raw_urls, str):
s = raw_urls.strip()
url_list = [s] if s else []
elif isinstance(raw_urls, list):
url_list = [str(u).strip() for u in raw_urls if isinstance(u, str) and str(u).strip()]
url_list = [
str(u).strip() for u in raw_urls if isinstance(u, str) and str(u).strip()
]
return {
"hash": file_hash,
@@ -1171,9 +1224,7 @@ class HydrusNetwork(Store):
# Fetch file metadata
payload = client.fetch_file_metadata(
hashes=[file_hash],
include_service_keys_to_tags=True,
include_file_url=False
hashes=[file_hash], include_service_keys_to_tags=True, include_file_url=False
)
items = payload.get("metadata") if isinstance(payload, dict) else None
@@ -1193,15 +1244,16 @@ class HydrusNetwork(Store):
# Extract tags from metadata
tags = self._extract_tags_from_hydrus_meta(meta, service_key, service_name)
return [str(t).strip().lower() for t in tags if isinstance(t, str) and t.strip()], "hydrus"
return [
str(t).strip().lower() for t in tags if isinstance(t, str) and t.strip()
], "hydrus"
except Exception as exc:
debug(f"{self._log_prefix()} get_tags failed: {exc}")
return [], "unknown"
def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
"""Add tags to a Hydrus file.
"""
"""Add tags to a Hydrus file."""
try:
client = self._client
if client is None:
@@ -1214,7 +1266,11 @@ class HydrusNetwork(Store):
return False
service_name = kwargs.get("service_name") or "my tags"
incoming_tags = [str(t).strip().lower() for t in (tags or []) if isinstance(t, str) and str(t).strip()]
incoming_tags = [
str(t).strip().lower()
for t in (tags or [])
if isinstance(t, str) and str(t).strip()
]
if not incoming_tags:
return True
@@ -1225,7 +1281,9 @@ class HydrusNetwork(Store):
from metadata import compute_namespaced_tag_overwrite
tags_to_remove, tags_to_add, _merged = compute_namespaced_tag_overwrite(existing_tags, incoming_tags)
tags_to_remove, tags_to_add, _merged = compute_namespaced_tag_overwrite(
existing_tags, incoming_tags
)
if not tags_to_add and not tags_to_remove:
return True
@@ -1250,8 +1308,7 @@ class HydrusNetwork(Store):
return False
def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
"""Delete tags from a Hydrus file.
"""
"""Delete tags from a Hydrus file."""
try:
client = self._client
if client is None:
@@ -1264,7 +1321,9 @@ class HydrusNetwork(Store):
return False
service_name = kwargs.get("service_name") or "my tags"
raw_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
tag_list = [str(t).strip().lower() for t in raw_list if isinstance(t, str) and str(t).strip()]
tag_list = [
str(t).strip().lower() for t in raw_list if isinstance(t, str) and str(t).strip()
]
if not tag_list:
return False
client.delete_tag(file_hash, tag_list, service_name)
@@ -1274,13 +1333,9 @@ class HydrusNetwork(Store):
return False
def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]:
"""Get known url for a Hydrus file.
"""
"""Get known url for a Hydrus file."""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} get_url: client unavailable")
return []
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
@@ -1292,12 +1347,7 @@ class HydrusNetwork(Store):
return []
meta = items[0] if isinstance(items[0], dict) else {}
raw_urls: Any = (
meta.get("known_urls")
or meta.get("urls")
or meta.get("url")
or []
)
raw_urls: Any = meta.get("known_urls") or meta.get("urls") or meta.get("url") or []
if isinstance(raw_urls, str):
val = raw_urls.strip()
return [val] if val else []
@@ -1316,8 +1366,7 @@ class HydrusNetwork(Store):
return []
def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
"""Associate one or more url with a Hydrus file.
"""
"""Associate one or more url with a Hydrus file."""
try:
client = self._client
if client is None:
@@ -1344,11 +1393,11 @@ class HydrusNetwork(Store):
return False
any_success = False
for file_identifier, urls in (items or []):
for file_identifier, urls in items or []:
h = str(file_identifier or "").strip().lower()
if len(h) != 64:
continue
for u in (urls or []):
for u in urls or []:
s = str(u or "").strip()
if not s:
continue
@@ -1363,8 +1412,7 @@ class HydrusNetwork(Store):
return False
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
"""Delete one or more url from a Hydrus file.
"""
"""Delete one or more url from a Hydrus file."""
try:
client = self._client
if client is None:
@@ -1453,9 +1501,7 @@ class HydrusNetwork(Store):
@staticmethod
def _extract_tags_from_hydrus_meta(
meta: Dict[str, Any],
service_key: Optional[str],
service_name: str
meta: Dict[str, Any], service_key: Optional[str], service_name: str
) -> List[str]:
"""Extract current tags from Hydrus metadata dict.
@@ -1482,6 +1528,8 @@ class HydrusNetwork(Store):
if isinstance(storage, dict):
current_list = storage.get("0") or storage.get(0)
if isinstance(current_list, list):
return [str(t) for t in current_list if isinstance(t, (str, bytes)) and str(t).strip()]
return [
str(t) for t in current_list if isinstance(t, (str, bytes)) and str(t).strip()
]
return []
+6 -4
View File
@@ -57,7 +57,7 @@ class Store(ABC):
Default behavior is to call add_url() per file.
"""
changed_any = False
for file_identifier, urls in (items or []):
for file_identifier, urls in items or []:
try:
ok = self.add_url(file_identifier, urls, **kwargs)
changed_any = changed_any or bool(ok)
@@ -72,7 +72,7 @@ class Store(ABC):
Default behavior is to call delete_url() per file.
"""
changed_any = False
for file_identifier, urls in (items or []):
for file_identifier, urls in items or []:
try:
ok = self.delete_url(file_identifier, urls, **kwargs)
changed_any = changed_any or bool(ok)
@@ -87,7 +87,7 @@ class Store(ABC):
Default behavior is to call set_note() per file.
"""
changed_any = False
for file_identifier, name, text in (items or []):
for file_identifier, name, text in items or []:
try:
ok = self.set_note(file_identifier, name, text, **kwargs)
changed_any = changed_any or bool(ok)
@@ -112,7 +112,9 @@ class Store(ABC):
"""Add or replace a named note for a file."""
raise NotImplementedError
def selector(self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any) -> bool:
def selector(
self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any
) -> bool:
"""Optional hook for handling `@N` selection semantics.
Return True if the selection was handled and default behavior should be skipped.
+16 -5
View File
@@ -88,7 +88,9 @@ def _required_keys_for(store_cls: Type[BaseStore]) -> list[str]:
raise TypeError(f"Unsupported __new__.keys type for {store_cls.__name__}: {type(keys)}")
def _build_kwargs(store_cls: Type[BaseStore], instance_name: str, instance_config: Any) -> Dict[str, Any]:
def _build_kwargs(
store_cls: Type[BaseStore], instance_name: str, instance_config: Any
) -> Dict[str, Any]:
if isinstance(instance_config, dict):
cfg_dict = dict(instance_config)
else:
@@ -97,7 +99,10 @@ def _build_kwargs(store_cls: Type[BaseStore], instance_name: str, instance_confi
required = _required_keys_for(store_cls)
# If NAME is required but not present, allow the instance key to provide it.
if any(_normalize_config_key(k) == "NAME" for k in required) and _get_case_insensitive(cfg_dict, "NAME") is None:
if (
any(_normalize_config_key(k) == "NAME" for k in required)
and _get_case_insensitive(cfg_dict, "NAME") is None
):
cfg_dict["NAME"] = str(instance_name)
kwargs: Dict[str, Any] = {}
@@ -116,14 +121,18 @@ def _build_kwargs(store_cls: Type[BaseStore], instance_name: str, instance_confi
class Store:
def __init__(self, config: Optional[Dict[str, Any]] = None, suppress_debug: bool = False) -> None:
def __init__(
self, config: Optional[Dict[str, Any]] = None, suppress_debug: bool = False
) -> None:
self._config = config or {}
self._suppress_debug = suppress_debug
self._backends: Dict[str, BaseStore] = {}
self._backend_errors: Dict[str, str] = {}
self._load_backends()
def _maybe_register_temp_alias(self, store_type: str, backend_name: str, kwargs: Dict[str, Any], backend: BaseStore) -> None:
def _maybe_register_temp_alias(
self, store_type: str, backend_name: str, kwargs: Dict[str, Any], backend: BaseStore
) -> None:
"""If a folder backend points at config['temp'], also expose it as the 'temp' backend.
This keeps config compatibility (e.g. existing 'default') while presenting the temp
@@ -236,7 +245,9 @@ class Store:
def __getitem__(self, backend_name: str) -> BaseStore:
if backend_name not in self._backends:
raise KeyError(f"Unknown store backend: {backend_name}. Available: {list(self._backends.keys())}")
raise KeyError(
f"Unknown store backend: {backend_name}. Available: {list(self._backends.keys())}"
)
return self._backends[backend_name]
def is_available(self, backend_name: str) -> bool:
+10 -3
View File
@@ -103,7 +103,10 @@ class AccessModal(ModalScreen):
# Also show the URL for reference/copying
yield Label(self.item_content, classes="access-label")
yield Label("\n[yellow]↑ Click the link above to open on your device[/yellow]", classes="access-label")
yield Label(
"\n[yellow]↑ Click the link above to open on your device[/yellow]",
classes="access-label",
)
with Horizontal(id="access-footer"):
yield Button("Copy URL", id="copy-btn", variant="primary")
@@ -118,19 +121,23 @@ class AccessModal(ModalScreen):
# Try to use pyperclip if available
try:
import pyperclip
pyperclip.copy(self.item_content)
logger.info("URL copied to clipboard via pyperclip")
except ImportError:
# Fallback: try xclip on Linux or pbcopy on Mac
import subprocess
import sys
if sys.platform == "win32":
# Windows: use clipboard via pyperclip (already tried)
logger.debug("Windows clipboard not available without pyperclip")
else:
# Linux/Mac
process = subprocess.Popen(['xclip', '-selection', 'clipboard'], stdin=subprocess.PIPE)
process.communicate(self.item_content.encode('utf-8'))
process = subprocess.Popen(
["xclip", "-selection", "clipboard"], stdin=subprocess.PIPE
)
process.communicate(self.item_content.encode("utf-8"))
logger.info("URL copied to clipboard via xclip")
except Exception as e:
logger.debug(f"Clipboard copy not available: {e}")
File diff suppressed because it is too large Load Diff
+139 -68
View File
@@ -30,7 +30,12 @@ class ExportModal(ModalScreen):
CSS_PATH = "export.tcss"
def __init__(self, result_data: Optional[dict] = None, hydrus_available: bool = False, debrid_available: bool = False):
def __init__(
self,
result_data: Optional[dict] = None,
hydrus_available: bool = False,
debrid_available: bool = False,
):
"""Initialize the export modal with result data.
Args:
@@ -67,7 +72,7 @@ class ExportModal(ModalScreen):
Returns:
Tuple of (file_type, format_options) where format_options is a list of (label, value) tuples
"""
ext_lower = ext.lower() if ext else ''
ext_lower = ext.lower() if ext else ""
from SYS.utils_constant import mime_maps
@@ -103,7 +108,13 @@ class ExportModal(ModalScreen):
options = [("Local", "local")]
try:
from config import load_config, get_hydrus_access_key, get_hydrus_url, get_debrid_api_key
from config import (
load_config,
get_hydrus_access_key,
get_hydrus_url,
get_debrid_api_key,
)
config = load_config()
hydrus_url = (get_hydrus_url(config, "home") or "").strip()
@@ -122,19 +133,29 @@ class ExportModal(ModalScreen):
def _get_metadata_text(self) -> str:
"""Format metadata from result data in a consistent display format."""
metadata = self.result_data.get('metadata', {})
source = self.result_data.get('source', 'unknown')
logger.info(f"_get_metadata_text called - source: {source}, metadata type: {type(metadata)}, keys: {list(metadata.keys()) if metadata else 'empty'}")
metadata = self.result_data.get("metadata", {})
source = self.result_data.get("source", "unknown")
logger.info(
f"_get_metadata_text called - source: {source}, metadata type: {type(metadata)}, keys: {list(metadata.keys()) if metadata else 'empty'}"
)
if not metadata:
logger.info(f"_get_metadata_text - No metadata found, returning 'No metadata available'")
logger.info(
f"_get_metadata_text - No metadata found, returning 'No metadata available'"
)
return "No metadata available"
lines = []
# Only display these specific fields in this order
display_fields = [
'duration', 'size', 'ext', 'media_type', 'time_imported', 'time_modified', 'hash'
"duration",
"size",
"ext",
"media_type",
"time_imported",
"time_modified",
"hash",
]
# Display fields in a consistent order
@@ -147,7 +168,7 @@ class ExportModal(ModalScreen):
# Use central formatting rule
formatted_value = format_metadata_value(field, value)
# Format: "Field Name: value"
field_label = field.replace('_', ' ').title()
field_label = field.replace("_", " ").title()
lines.append(f"{field_label}: {formatted_value}")
# If we found any fields, display them
@@ -185,29 +206,25 @@ class ExportModal(ModalScreen):
# Export To selector
self.export_to_select = Select(
[("0x0", "0x0"), ("Libraries", "libraries"), ("Custom Path", "path")],
id="export-to-select"
id="export-to-select",
)
yield self.export_to_select
# Libraries selector (initially hidden)
library_options = self._get_library_options()
self.libraries_select = Select(
library_options,
id="libraries-select"
)
self.libraries_select = Select(library_options, id="libraries-select")
yield self.libraries_select
# Custom path input (initially hidden)
self.custom_path_input = Input(
placeholder="Enter custom export path",
id="custom-path-input"
placeholder="Enter custom export path", id="custom-path-input"
)
yield self.custom_path_input
# Get metadata for size and format options
metadata = self.result_data.get('metadata', {})
original_size = metadata.get('size', '')
ext = metadata.get('ext', '')
metadata = self.result_data.get("metadata", {})
original_size = metadata.get("size", "")
ext = metadata.get("ext", "")
# Store the extension and determine file type
self.file_ext = ext
@@ -215,7 +232,11 @@ class ExportModal(ModalScreen):
# Format size in MB for display
if original_size:
size_mb = int(original_size / (1024 * 1024)) if isinstance(original_size, (int, float)) else original_size
size_mb = (
int(original_size / (1024 * 1024))
if isinstance(original_size, (int, float))
else original_size
)
size_display = f"{size_mb}Mb"
else:
size_display = ""
@@ -225,7 +246,9 @@ class ExportModal(ModalScreen):
value=size_display,
placeholder="Size (can reduce)",
id="size-input",
disabled=(self.file_type == 'document') # Disable for documents - no resizing needed
disabled=(
self.file_type == "document"
), # Disable for documents - no resizing needed
)
yield self.size_input
@@ -233,17 +256,23 @@ class ExportModal(ModalScreen):
default_format = None
if ext and format_options:
# Map extension to format value (e.g., .flac -> "flac", .mp3 -> "mp3", .m4a -> "m4a")
ext_lower = ext.lower().lstrip('.') # Remove leading dot if present
ext_lower = ext.lower().lstrip(".") # Remove leading dot if present
# Try to find matching format option
for _, value in format_options:
if value and (ext_lower == value or f".{ext_lower}" == ext or ext.endswith(f".{value}")):
if value and (
ext_lower == value
or f".{ext_lower}" == ext
or ext.endswith(f".{value}")
):
default_format = value
logger.debug(f"Matched extension {ext} to format {value}")
break
# If no exact match, use first option
if not default_format and format_options:
default_format = format_options[0][1]
logger.debug(f"No format match for {ext}, using first option: {default_format}")
logger.debug(
f"No format match for {ext}, using first option: {default_format}"
)
# Store the default format to apply after mount
self.default_format = default_format
@@ -252,7 +281,7 @@ class ExportModal(ModalScreen):
self.format_select = Select(
format_options if format_options else [("No conversion", "")],
id="format-select",
disabled=not format_options # Disable if no format options (e.g., documents)
disabled=not format_options, # Disable if no format options (e.g., documents)
)
yield self.format_select
@@ -263,14 +292,14 @@ class ExportModal(ModalScreen):
def _format_tags(self) -> str:
"""Format tags from result data."""
tags = self.result_data.get('tags', '')
tags = self.result_data.get("tags", "")
if isinstance(tags, str):
# Split by comma and rejoin with newlines
tags_list = [tag.strip() for tag in tags.split(',') if tag.strip()]
return '\n'.join(tags_list)
tags_list = [tag.strip() for tag in tags.split(",") if tag.strip()]
return "\n".join(tags_list)
elif isinstance(tags, list):
return '\n'.join(tags)
return ''
return "\n".join(tags)
return ""
def on_button_pressed(self, event: Button.Pressed) -> None:
"""Handle button press events."""
@@ -286,9 +315,9 @@ class ExportModal(ModalScreen):
if event.control.id == "export-to-select":
# Show/hide custom path and libraries based on selection
if self.custom_path_input:
self.custom_path_input.display = (event.value == "path")
self.custom_path_input.display = event.value == "path"
if self.libraries_select:
self.libraries_select.display = (event.value == "libraries")
self.libraries_select.display = event.value == "libraries"
elif event.control.id == "libraries-select":
# Handle library selection (no special action needed currently)
logger.debug(f"Library selected: {event.value}")
@@ -310,7 +339,10 @@ class ExportModal(ModalScreen):
if self.metadata_display:
metadata_text = self._get_metadata_text()
self.metadata_display.update(metadata_text)
logger.debug(f"Updated metadata display on mount: {bool(self.result_data.get('metadata'))}")
logger.debug(
f"Updated metadata display on mount: {bool(self.result_data.get('metadata'))}"
)
def _handle_export(self) -> None:
"""Handle the export action."""
try:
@@ -337,38 +369,70 @@ class ExportModal(ModalScreen):
# Parse tags from textarea (one per line)
export_tags = set()
for line in tags_text.split('\n'):
for line in tags_text.split("\n"):
tag = line.strip()
if tag:
export_tags.add(tag)
# For Hydrus export, filter out metadata-only tags (hash:, url:, relationship:)
if export_to == "libraries" and library == "hydrus":
metadata_prefixes = {'hash:', 'url:', 'relationship:'}
export_tags = {tag for tag in export_tags if not any(tag.lower().startswith(prefix) for prefix in metadata_prefixes)}
logger.info(f"Filtered tags for Hydrus - removed metadata tags, {len(export_tags)} tags remaining")
metadata_prefixes = {"hash:", "url:", "relationship:"}
export_tags = {
tag
for tag in export_tags
if not any(tag.lower().startswith(prefix) for prefix in metadata_prefixes)
}
logger.info(
f"Filtered tags for Hydrus - removed metadata tags, {len(export_tags)} tags remaining"
)
# Extract title and add as searchable tags if not already present
title = self.result_data.get('title', '').strip()
title = self.result_data.get("title", "").strip()
if title:
# Add the full title as a tag if not already present
title_tag = f"title:{title}"
if title_tag not in export_tags and not any(t.startswith('title:') for t in export_tags):
if title_tag not in export_tags and not any(
t.startswith("title:") for t in export_tags
):
export_tags.add(title_tag)
# Extract individual words from title as searchable tags (if reasonable length)
# Skip very short words and common stop words
if len(title) < 100: # Only for reasonably short titles
stop_words = {'the', 'a', 'an', 'and', 'or', 'of', 'in', 'to', 'for', 'is', 'it', 'at', 'by', 'from', 'with', 'as', 'be', 'on', 'that', 'this', 'this'}
stop_words = {
"the",
"a",
"an",
"and",
"or",
"of",
"in",
"to",
"for",
"is",
"it",
"at",
"by",
"from",
"with",
"as",
"be",
"on",
"that",
"this",
"this",
}
words = title.lower().split()
for word in words:
# Clean up word (remove punctuation)
clean_word = ''.join(c for c in word if c.isalnum())
clean_word = "".join(c for c in word if c.isalnum())
# Only add if not a stop word and has some length
if clean_word and len(clean_word) > 2 and clean_word not in stop_words:
if clean_word not in export_tags:
export_tags.add(clean_word)
logger.info(f"Extracted {len(words)} words from title, added searchable title tags")
logger.info(
f"Extracted {len(words)} words from title, added searchable title tags"
)
# Validate required fields - allow export to continue for Hydrus even with 0 actual tags
# (metadata tags will still be in the sidecar, and tags can be added later)
@@ -393,31 +457,33 @@ class ExportModal(ModalScreen):
export_path = export_to # "0x0"
# Get metadata from result_data
metadata = self.result_data.get('metadata', {})
metadata = self.result_data.get("metadata", {})
# Extract file source info from result_data (passed by hub-ui)
file_hash = self.result_data.get('hash')
file_url = self.result_data.get('url')
file_path = self.result_data.get('path')
source = self.result_data.get('source', 'unknown')
file_hash = self.result_data.get("hash")
file_url = self.result_data.get("url")
file_path = self.result_data.get("path")
source = self.result_data.get("source", "unknown")
# Prepare export data
export_data = {
'export_to': export_to,
'export_path': export_path,
'library': library if export_to == "libraries" else None,
'tags': export_tags,
'size': size if size else None,
'format': file_format,
'metadata': metadata,
'original_data': self.result_data,
'hash': file_hash,
'url': file_url,
'path': file_path,
'source': source,
"export_to": export_to,
"export_path": export_path,
"library": library if export_to == "libraries" else None,
"tags": export_tags,
"size": size if size else None,
"format": file_format,
"metadata": metadata,
"original_data": self.result_data,
"hash": file_hash,
"url": file_url,
"path": file_path,
"source": source,
}
logger.info(f"Export initiated: destination={export_path}, format={file_format}, size={size}, tags={export_tags}, source={source}, hash={file_hash}, path={file_path}")
logger.info(
f"Export initiated: destination={export_path}, format={file_format}, size={size}, tags={export_tags}, source={source}, hash={file_hash}, path={file_path}"
)
# Dismiss the modal and return the export data
self.dismiss(export_data)
@@ -430,7 +496,6 @@ class ExportModal(ModalScreen):
self.dismiss(None)
def create_notes_sidecar(file_path: Path, notes: str) -> None:
"""Create a .notes sidecar file with notes text.
@@ -443,9 +508,9 @@ def create_notes_sidecar(file_path: Path, notes: str) -> None:
if not notes or not notes.strip():
return
notes_path = file_path.with_suffix(file_path.suffix + '.notes')
notes_path = file_path.with_suffix(file_path.suffix + ".notes")
try:
with open(notes_path, 'w', encoding='utf-8') as f:
with open(notes_path, "w", encoding="utf-8") as f:
f.write(notes.strip())
logger.info(f"Created notes sidecar: {notes_path}")
except Exception as e:
@@ -463,18 +528,24 @@ def determine_needs_conversion(current_ext: str, target_format: str) -> bool:
True if conversion is needed, False if it's already the target format
"""
# Handle NoSelection or None
if not target_format or target_format == "" or str(target_format.__class__.__name__) == 'NoSelection':
if (
not target_format
or target_format == ""
or str(target_format.__class__.__name__) == "NoSelection"
):
return False # No conversion requested
# Normalize the current extension
current_ext_lower = current_ext.lower().lstrip('.')
current_ext_lower = current_ext.lower().lstrip(".")
target_format_lower = str(target_format).lower()
# Check if they match
return current_ext_lower != target_format_lower
def calculate_size_tolerance(metadata: dict, user_size_mb: Optional[str]) -> tuple[Optional[int], Optional[int]]:
def calculate_size_tolerance(
metadata: dict, user_size_mb: Optional[str]
) -> tuple[Optional[int], Optional[int]]:
"""Calculate target size with 1MB grace period.
Args:
@@ -493,9 +564,9 @@ def calculate_size_tolerance(metadata: dict, user_size_mb: Optional[str]) -> tup
try:
# Parse the size string (format like "756Mb")
size_str = user_size_mb.strip().lower()
if size_str.endswith('mb'):
if size_str.endswith("mb"):
size_str = size_str[:-2]
elif size_str.endswith('m'):
elif size_str.endswith("m"):
size_str = size_str[:-1]
size_mb = float(size_str)
+37 -30
View File
@@ -34,6 +34,7 @@ class SearchModal(ModalScreen):
class SearchSelected(Message):
"""Posted when user selects a search result."""
def __init__(self, result: dict) -> None:
self.result = result
super().__init__()
@@ -66,15 +67,12 @@ class SearchModal(ModalScreen):
self.source_select = Select(
[("OpenLibrary", "openlibrary"), ("Soulseek", "soulseek")],
value="openlibrary",
id="source-select"
id="source-select",
)
yield self.source_select
# Search input
self.search_input = Input(
placeholder="Enter search query...",
id="search-input"
)
self.search_input = Input(placeholder="Enter search query...", id="search-input")
yield self.search_input
# Search button
@@ -89,9 +87,7 @@ class SearchModal(ModalScreen):
# Left column: Tags textarea
with Vertical(id="tags-column"):
self.tags_textarea = TextArea(
text="",
id="result-tags-textarea",
read_only=False
text="", id="result-tags-textarea", read_only=False
)
self.tags_textarea.border_title = "Tags [Ctrl+T: Scrape]"
yield self.tags_textarea
@@ -102,7 +98,7 @@ class SearchModal(ModalScreen):
self.library_source_select = Select(
[("Local", "local"), ("Download", "download")],
value="local",
id="library-source-select"
id="library-source-select",
)
yield self.library_source_select
@@ -118,12 +114,7 @@ class SearchModal(ModalScreen):
def on_mount(self) -> None:
"""Set up the table columns and focus."""
# Set up results table columns
self.results_table.add_columns(
"Title",
"Author/Artist",
"Year/Album",
"Details"
)
self.results_table.add_columns("Title", "Author/Artist", "Year/Album", "Details")
# Focus on search input
self.search_input.focus()
@@ -149,11 +140,11 @@ class SearchModal(ModalScreen):
self.is_searching = True
# Create worker for tracking
if self.app_instance and hasattr(self.app_instance, 'create_worker'):
if self.app_instance and hasattr(self.app_instance, "create_worker"):
self.current_worker = self.app_instance.create_worker(
source,
title=f"{source.capitalize()} Search: {query[:40]}",
description=f"Searching {source} for: {query}"
description=f"Searching {source} for: {query}",
)
self.current_worker.log_step(f"Connecting to {source}...")
@@ -183,7 +174,13 @@ class SearchModal(ModalScreen):
else:
# Fallback if no columns defined
row.add_column("Title", res.title)
row.add_column("Target", getattr(res, 'path', None) or getattr(res, 'url', None) or getattr(res, 'target', None) or '')
row.add_column(
"Target",
getattr(res, "path", None)
or getattr(res, "url", None)
or getattr(res, "target", None)
or "",
)
self.current_result_table = table
@@ -239,7 +236,9 @@ class SearchModal(ModalScreen):
if getattr(result, "table", "") == "openlibrary":
asyncio.create_task(self._download_book(result))
else:
logger.warning("[search-modal] Download only supported for OpenLibrary results")
logger.warning(
"[search-modal] Download only supported for OpenLibrary results"
)
else:
logger.warning("[search-modal] No result selected for download")
@@ -251,7 +250,7 @@ class SearchModal(ModalScreen):
result = self.current_results[selected_row]
# Convert to dict if needed for submission
if hasattr(result, 'to_dict'):
if hasattr(result, "to_dict"):
result_dict = result.to_dict()
else:
result_dict = result
@@ -259,7 +258,9 @@ class SearchModal(ModalScreen):
# Get tags from textarea
tags_text = self.tags_textarea.text if self.tags_textarea else ""
# Get library source (if OpenLibrary)
library_source = self.library_source_select.value if self.library_source_select else "local"
library_source = (
self.library_source_select.value if self.library_source_select else "local"
)
# Add tags and source to result
result_dict["tags_text"] = tags_text
@@ -280,26 +281,30 @@ class SearchModal(ModalScreen):
return
# Handle both SearchResult objects and dicts
if hasattr(result, 'full_metadata'):
if hasattr(result, "full_metadata"):
metadata = result.full_metadata or {}
source = result.table
title = result.title
else:
# Handle dict (legacy or from to_dict)
if 'full_metadata' in result:
metadata = result['full_metadata'] or {}
elif 'raw_data' in result:
metadata = result['raw_data'] or {}
if "full_metadata" in result:
metadata = result["full_metadata"] or {}
elif "raw_data" in result:
metadata = result["raw_data"] or {}
else:
metadata = result
source = result.get('table', '')
title = result.get('title', '')
source = result.get("table", "")
title = result.get("title", "")
# Format tags based on result source
if source == "openlibrary":
# For OpenLibrary: title, author, year
author = ", ".join(metadata.get("authors", [])) if isinstance(metadata.get("authors"), list) else metadata.get("authors", "")
author = (
", ".join(metadata.get("authors", []))
if isinstance(metadata.get("authors"), list)
else metadata.get("authors", "")
)
year = str(metadata.get("year", ""))
tags = []
if title:
@@ -369,7 +374,9 @@ class SearchModal(ModalScreen):
if 0 <= selected_row < len(self.current_results):
result = self.current_results[selected_row]
self._populate_tags_from_result(result)
logger.info(f"[search-modal] Ctrl+T: Populated tags from result at row {selected_row}")
logger.info(
f"[search-modal] Ctrl+T: Populated tags from result at row {selected_row}"
)
else:
logger.warning(f"[search-modal] Ctrl+T: Invalid row index {selected_row}")
except Exception as e:
+135 -100
View File
@@ -28,12 +28,14 @@ class WorkersModal(ModalScreen):
class WorkerUpdated(Message):
"""Posted when worker list is updated."""
def __init__(self, workers: List[Dict[str, Any]]) -> None:
self.workers = workers
super().__init__()
class WorkerCancelled(Message):
"""Posted when user cancels a worker."""
def __init__(self, worker_id: str) -> None:
self.worker_id = worker_id
super().__init__()
@@ -97,27 +99,14 @@ class WorkersModal(ModalScreen):
# Set up running workers table
if self.running_table:
self.running_table.add_columns(
"ID",
"Type",
"Status",
"Pipe",
"Progress",
"Started",
"Details"
"ID", "Type", "Status", "Pipe", "Progress", "Started", "Details"
)
self.running_table.zebra_stripes = True
# Set up finished workers table
if self.finished_table:
self.finished_table.add_columns(
"ID",
"Type",
"Result",
"Pipe",
"Started",
"Completed",
"Duration",
"Details"
"ID", "Type", "Result", "Pipe", "Started", "Completed", "Duration", "Details"
)
self.finished_table.zebra_stripes = True
@@ -139,22 +128,30 @@ class WorkersModal(ModalScreen):
# Get running workers from app instance
# This assumes the app has a get_running_workers() method
if hasattr(self.app_instance, 'get_running_workers'):
if hasattr(self.app_instance, "get_running_workers"):
self.running_workers = self.app_instance.get_running_workers()
else:
self.running_workers = []
# Get finished workers from app instance
if hasattr(self.app_instance, 'get_finished_workers'):
if hasattr(self.app_instance, "get_finished_workers"):
self.finished_workers = self.app_instance.get_finished_workers()
if self.finished_workers:
logger.info(f"[workers-modal-refresh] Got {len(self.finished_workers)} finished workers from app")
logger.info(
f"[workers-modal-refresh] Got {len(self.finished_workers)} finished workers from app"
)
# Log the keys in the first worker to verify structure
if isinstance(self.finished_workers[0], dict):
logger.info(f"[workers-modal-refresh] First worker keys: {list(self.finished_workers[0].keys())}")
logger.info(f"[workers-modal-refresh] First worker: {self.finished_workers[0]}")
logger.info(
f"[workers-modal-refresh] First worker keys: {list(self.finished_workers[0].keys())}"
)
logger.info(
f"[workers-modal-refresh] First worker: {self.finished_workers[0]}"
)
else:
logger.warning(f"[workers-modal-refresh] First worker is not a dict: {type(self.finished_workers[0])}")
logger.warning(
f"[workers-modal-refresh] First worker is not a dict: {type(self.finished_workers[0])}"
)
else:
self.finished_workers = []
@@ -162,7 +159,9 @@ class WorkersModal(ModalScreen):
self._update_running_table()
self._update_finished_table()
logger.info(f"[workers-modal] Refreshed: {len(self.running_workers)} running, {len(self.finished_workers)} finished")
logger.info(
f"[workers-modal] Refreshed: {len(self.running_workers)} running, {len(self.finished_workers)} finished"
)
except Exception as e:
logger.error(f"[workers-modal] Error refreshing workers: {e}")
@@ -199,29 +198,33 @@ class WorkersModal(ModalScreen):
self.running_table.clear()
if not self.running_workers:
self.running_table.add_row("---", "---", "---", "---", "---", "---", "No workers running")
self.running_table.add_row(
"---", "---", "---", "---", "---", "---", "No workers running"
)
logger.debug(f"[workers-modal] No running workers to display")
return
logger.debug(f"[workers-modal] Updating running table with {len(self.running_workers)} workers")
logger.debug(
f"[workers-modal] Updating running table with {len(self.running_workers)} workers"
)
for idx, worker_info in enumerate(self.running_workers):
try:
worker_id = worker_info.get('id', 'unknown')
worker_type = worker_info.get('type', 'unknown')
status = worker_info.get('status', 'running')
progress = worker_info.get('progress', '')
started = worker_info.get('started', '')
details = worker_info.get('details', '')
pipe = worker_info.get('pipe', '')
worker_id = worker_info.get("id", "unknown")
worker_type = worker_info.get("type", "unknown")
status = worker_info.get("status", "running")
progress = worker_info.get("progress", "")
started = worker_info.get("started", "")
details = worker_info.get("details", "")
pipe = worker_info.get("pipe", "")
# Ensure values are strings
worker_id = str(worker_id) if worker_id else 'unknown'
worker_type = str(worker_type) if worker_type else 'unknown'
status = str(status) if status else 'running'
progress = str(progress) if progress else '---'
started = str(started) if started else '---'
details = str(details) if details else '---'
worker_id = str(worker_id) if worker_id else "unknown"
worker_type = str(worker_type) if worker_type else "unknown"
status = str(status) if status else "running"
progress = str(progress) if progress else "---"
started = str(started) if started else "---"
details = str(details) if details else "---"
pipe_display = self._summarize_pipe(pipe)
# Truncate long strings
@@ -237,15 +240,22 @@ class WorkersModal(ModalScreen):
pipe_display,
progress,
started,
details
details,
)
if idx == 0: # Log first entry
logger.debug(f"[workers-modal] Added running row {idx}: {worker_id[:8]} {worker_type[:15]} {status}")
logger.debug(
f"[workers-modal] Added running row {idx}: {worker_id[:8]} {worker_type[:15]} {status}"
)
except Exception as row_error:
logger.error(f"[workers-modal] Error adding running row {idx}: {row_error}", exc_info=True)
logger.error(
f"[workers-modal] Error adding running row {idx}: {row_error}",
exc_info=True,
)
logger.debug(f"[workers-modal] Updated running table with {len(self.running_workers)} workers")
logger.debug(
f"[workers-modal] Updated running table with {len(self.running_workers)} workers"
)
except Exception as e:
logger.error(f"[workers-modal] Error updating running table: {e}", exc_info=True)
@@ -259,33 +269,37 @@ class WorkersModal(ModalScreen):
self.finished_table.clear()
if not self.finished_workers:
self.finished_table.add_row("---", "---", "---", "---", "---", "---", "---", "No finished workers")
self.finished_table.add_row(
"---", "---", "---", "---", "---", "---", "---", "No finished workers"
)
logger.debug(f"[workers-modal] No finished workers to display")
return
logger.info(f"[workers-modal-update] STARTING to update finished table with {len(self.finished_workers)} workers")
logger.info(
f"[workers-modal-update] STARTING to update finished table with {len(self.finished_workers)} workers"
)
added_count = 0
error_count = 0
for idx, worker_info in enumerate(self.finished_workers):
try:
worker_id = worker_info.get('id', 'unknown')
worker_type = worker_info.get('type', 'unknown')
result = worker_info.get('result', 'unknown')
completed = worker_info.get('completed', '')
duration = worker_info.get('duration', '')
details = worker_info.get('details', '')
pipe = worker_info.get('pipe', '')
started = worker_info.get('started', '')
worker_id = worker_info.get("id", "unknown")
worker_type = worker_info.get("type", "unknown")
result = worker_info.get("result", "unknown")
completed = worker_info.get("completed", "")
duration = worker_info.get("duration", "")
details = worker_info.get("details", "")
pipe = worker_info.get("pipe", "")
started = worker_info.get("started", "")
# Ensure values are strings
worker_id = str(worker_id) if worker_id else 'unknown'
worker_type = str(worker_type) if worker_type else 'unknown'
result = str(result) if result else 'unknown'
completed = str(completed) if completed else '---'
duration = str(duration) if duration else '---'
details = str(details) if details else '---'
started = str(started) if started else '---'
worker_id = str(worker_id) if worker_id else "unknown"
worker_type = str(worker_type) if worker_type else "unknown"
result = str(result) if result else "unknown"
completed = str(completed) if completed else "---"
duration = str(duration) if duration else "---"
details = str(details) if details else "---"
started = str(started) if started else "---"
pipe_display = self._summarize_pipe(pipe)
# Truncate long strings
@@ -304,16 +318,23 @@ class WorkersModal(ModalScreen):
started,
completed,
duration,
details
details,
)
added_count += 1
except Exception as row_error:
error_count += 1
logger.error(f"[workers-modal-update] Error adding finished row {idx}: {row_error}", exc_info=True)
logger.error(
f"[workers-modal-update] Error adding finished row {idx}: {row_error}",
exc_info=True,
)
logger.info(f"[workers-modal-update] COMPLETED: Added {added_count}/{len(self.finished_workers)} finished workers (errors: {error_count})")
logger.debug(f"[workers-modal-update] Finished table row_count after update: {self.finished_table.row_count}")
logger.info(
f"[workers-modal-update] COMPLETED: Added {added_count}/{len(self.finished_workers)} finished workers (errors: {error_count})"
)
logger.debug(
f"[workers-modal-update] Finished table row_count after update: {self.finished_table.row_count}"
)
except Exception as e:
logger.error(f"[workers-modal] Error updating finished table: {e}", exc_info=True)
@@ -329,7 +350,9 @@ class WorkersModal(ModalScreen):
logger.debug(f"[workers-modal] Highlighted in running table")
elif event.control == self.finished_table:
workers_list = self.finished_workers
logger.debug(f"[workers-modal] Highlighted in finished table, list size: {len(workers_list)}")
logger.debug(
f"[workers-modal] Highlighted in finished table, list size: {len(workers_list)}"
)
else:
logger.warning(f"[workers-modal] Unknown table: {event.control}")
return
@@ -337,7 +360,7 @@ class WorkersModal(ModalScreen):
# Get the worker at this row
if workers_list and 0 <= event.cursor_row < len(workers_list):
worker = workers_list[event.cursor_row]
worker_id = worker.get('id', '')
worker_id = worker.get("id", "")
logger.info(f"[workers-modal] Highlighted worker: {worker_id}")
if worker_id:
@@ -345,7 +368,9 @@ class WorkersModal(ModalScreen):
# Display the stdout
self._update_stdout_display(worker_id, worker)
else:
logger.warning(f"[workers-modal] Row {event.cursor_row} out of bounds for list of size {len(workers_list) if workers_list else 0}")
logger.warning(
f"[workers-modal] Row {event.cursor_row} out of bounds for list of size {len(workers_list) if workers_list else 0}"
)
except Exception as e:
logger.error(f"[workers-modal] Error handling row highlight: {e}", exc_info=True)
@@ -354,7 +379,9 @@ class WorkersModal(ModalScreen):
try:
# CellHighlighted has coordinate (row, column) not cursor_row
cursor_row = event.coordinate.row
logger.debug(f"[workers-modal] Cell highlighted, row: {cursor_row}, column: {event.coordinate.column}")
logger.debug(
f"[workers-modal] Cell highlighted, row: {cursor_row}, column: {event.coordinate.column}"
)
# Get the selected worker from the correct table
workers_list = None
@@ -363,14 +390,16 @@ class WorkersModal(ModalScreen):
logger.debug(f"[workers-modal] Cell highlighted in running table")
elif event.data_table == self.finished_table:
workers_list = self.finished_workers
logger.debug(f"[workers-modal] Cell highlighted in finished table, list size: {len(workers_list)}")
logger.debug(
f"[workers-modal] Cell highlighted in finished table, list size: {len(workers_list)}"
)
else:
return
# Get the worker at this row
if workers_list and 0 <= cursor_row < len(workers_list):
worker = workers_list[cursor_row]
worker_id = worker.get('id', '')
worker_id = worker.get("id", "")
if worker_id and worker_id != self.selected_worker_id:
logger.info(f"[workers-modal] Cell-highlighted worker: {worker_id}")
@@ -380,7 +409,9 @@ class WorkersModal(ModalScreen):
except Exception as e:
logger.debug(f"[workers-modal] Error handling cell highlight: {e}")
def _update_stdout_display(self, worker_id: str, worker: Optional[Dict[str, Any]] = None) -> None:
def _update_stdout_display(
self, worker_id: str, worker: Optional[Dict[str, Any]] = None
) -> None:
"""Update the stdout textarea with logs from the selected worker."""
try:
if not self.stdout_display:
@@ -400,7 +431,9 @@ class WorkersModal(ModalScreen):
logs_body = (stdout_text or "").strip()
sections.append("Logs:\n" + (logs_body if logs_body else "(no logs recorded)"))
combined_text = "\n\n".join(sections)
logger.debug(f"[workers-modal] Setting textarea to {len(combined_text)} chars (stdout_len={len(stdout_text or '')})")
logger.debug(
f"[workers-modal] Setting textarea to {len(combined_text)} chars (stdout_len={len(stdout_text or '')})"
)
self.stdout_display.text = combined_text
if len(combined_text) > 10:
try:
@@ -413,34 +446,34 @@ class WorkersModal(ModalScreen):
def _locate_worker(self, worker_id: str) -> Optional[Dict[str, Any]]:
for worker in self.running_workers or []:
if isinstance(worker, dict) and worker.get('id') == worker_id:
if isinstance(worker, dict) and worker.get("id") == worker_id:
return worker
for worker in self.finished_workers or []:
if isinstance(worker, dict) and worker.get('id') == worker_id:
if isinstance(worker, dict) and worker.get("id") == worker_id:
return worker
return None
def _resolve_worker_stdout(self, worker_id: str, worker: Optional[Dict[str, Any]]) -> str:
if worker and worker.get('stdout'):
return worker.get('stdout', '') or ''
manager = getattr(self.app_instance, 'worker_manager', None)
if worker and worker.get("stdout"):
return worker.get("stdout", "") or ""
manager = getattr(self.app_instance, "worker_manager", None)
if manager:
try:
return manager.get_stdout(worker_id) or ''
return manager.get_stdout(worker_id) or ""
except Exception as exc:
logger.debug(f"[workers-modal] Could not fetch stdout for {worker_id}: {exc}")
return ''
return ""
def _resolve_worker_pipe(self, worker_id: str, worker: Optional[Dict[str, Any]]) -> str:
if worker and worker.get('pipe'):
return str(worker.get('pipe'))
if worker and worker.get("pipe"):
return str(worker.get("pipe"))
record = self._fetch_worker_record(worker_id)
if record and record.get('pipe'):
return str(record.get('pipe'))
return ''
if record and record.get("pipe"):
return str(record.get("pipe"))
return ""
def _fetch_worker_record(self, worker_id: str) -> Optional[Dict[str, Any]]:
manager = getattr(self.app_instance, 'worker_manager', None)
manager = getattr(self.app_instance, "worker_manager", None)
if not manager:
return None
try:
@@ -450,7 +483,7 @@ class WorkersModal(ModalScreen):
return None
def _get_worker_events(self, worker_id: str, limit: int = 250) -> List[Dict[str, Any]]:
manager = getattr(self.app_instance, 'worker_manager', None)
manager = getattr(self.app_instance, "worker_manager", None)
if not manager:
return []
try:
@@ -464,15 +497,15 @@ class WorkersModal(ModalScreen):
return ""
lines: List[str] = []
for event in events:
timestamp = self._format_event_timestamp(event.get('created_at'))
label = (event.get('event_type') or '').upper() or 'EVENT'
channel = (event.get('channel') or '').upper()
timestamp = self._format_event_timestamp(event.get("created_at"))
label = (event.get("event_type") or "").upper() or "EVENT"
channel = (event.get("channel") or "").upper()
if channel and channel not in label:
label = f"{label}/{channel}"
step = event.get('step') or ''
message = event.get('message') or ''
prefix = ''
if event.get('event_type') == 'step' and step:
step = event.get("step") or ""
message = event.get("message") or ""
prefix = ""
if event.get("event_type") == "step" and step:
prefix = f"{step} :: "
elif step and step not in message:
prefix = f"{step} :: "
@@ -493,7 +526,7 @@ class WorkersModal(ModalScreen):
return time_part[:8] if len(time_part) >= 8 else time_part
def _format_message_block(self, message: str) -> str:
clean = (message or '').strip()
clean = (message or "").strip()
if not clean:
return "(empty)"
lines = clean.splitlines()
@@ -504,10 +537,10 @@ class WorkersModal(ModalScreen):
return f"{head}\n{indented}"
def _summarize_pipe(self, pipe_value: Any, limit: int = 40) -> str:
text = str(pipe_value or '').strip()
text = str(pipe_value or "").strip()
if not text:
return "(none)"
return text if len(text) <= limit else text[: limit - 3] + '...'
return text if len(text) <= limit else text[: limit - 3] + "..."
def on_button_pressed(self, event: Button.Pressed) -> None:
"""Handle button presses."""
@@ -534,8 +567,8 @@ class WorkersModal(ModalScreen):
selected_row = self.running_table.cursor_row
if 0 <= selected_row < len(self.running_workers):
worker = self.running_workers[selected_row]
worker_id = worker.get('id')
if self.app_instance and hasattr(self.app_instance, 'stop_worker'):
worker_id = worker.get("id")
if self.app_instance and hasattr(self.app_instance, "stop_worker"):
self.app_instance.stop_worker(worker_id)
logger.info(f"[workers-modal] Stopped worker: {worker_id}")
self.refresh_workers()
@@ -544,7 +577,7 @@ class WorkersModal(ModalScreen):
elif button_id == "running-stop-all-btn":
# Stop all running workers
if self.app_instance and hasattr(self.app_instance, 'stop_all_workers'):
if self.app_instance and hasattr(self.app_instance, "stop_all_workers"):
self.app_instance.stop_all_workers()
logger.info("[workers-modal] Stopped all workers")
self.refresh_workers()
@@ -559,8 +592,10 @@ class WorkersModal(ModalScreen):
selected_row = self.finished_table.cursor_row
if 0 <= selected_row < len(self.finished_workers):
worker = self.finished_workers[selected_row]
worker_id = worker.get('id')
if self.app_instance and hasattr(self.app_instance, 'clear_finished_worker'):
worker_id = worker.get("id")
if self.app_instance and hasattr(
self.app_instance, "clear_finished_worker"
):
self.app_instance.clear_finished_worker(worker_id)
logger.info(f"[workers-modal] Cleared worker: {worker_id}")
self.refresh_workers()
@@ -569,7 +604,7 @@ class WorkersModal(ModalScreen):
elif button_id == "finished-clear-all-btn":
# Clear all finished workers
if self.app_instance and hasattr(self.app_instance, 'clear_all_finished_workers'):
if self.app_instance and hasattr(self.app_instance, "clear_all_finished_workers"):
self.app_instance.clear_all_finished_workers()
logger.info("[workers-modal] Cleared all finished workers")
self.refresh_workers()
+23 -5
View File
@@ -3,6 +3,7 @@
The TUI is a frontend to the CLI, so it must use the same pipeline executor
implementation as the CLI (`CLI.PipelineExecutor`).
"""
from __future__ import annotations
import contextlib
@@ -149,7 +150,10 @@ class PipelineRunner:
try:
with capture_rich_output(stdout=stdout_buffer, stderr=stderr_buffer):
with contextlib.redirect_stdout(stdout_buffer), contextlib.redirect_stderr(stderr_buffer):
with (
contextlib.redirect_stdout(stdout_buffer),
contextlib.redirect_stderr(stderr_buffer),
):
if on_log:
on_log("Executing pipeline via CLI executor...")
self._executor.execute_tokens(list(tokens))
@@ -166,7 +170,11 @@ class PipelineRunner:
# Pull the canonical state out of pipeline context.
table = None
try:
table = ctx.get_display_table() or ctx.get_current_stage_table() or ctx.get_last_result_table()
table = (
ctx.get_display_table()
or ctx.get_current_stage_table()
or ctx.get_last_result_table()
)
except Exception:
table = None
@@ -259,7 +267,11 @@ class PipelineRunner:
try:
hist = list(getattr(ctx, "_RESULT_TABLE_HISTORY", []) or [])
snap["_RESULT_TABLE_HISTORY"] = [
(t, (items.copy() if isinstance(items, list) else list(items) if items else []), subj)
(
t,
(items.copy() if isinstance(items, list) else list(items) if items else []),
subj,
)
for (t, items, subj) in hist
if isinstance((t, items, subj), tuple)
]
@@ -269,7 +281,11 @@ class PipelineRunner:
try:
fwd = list(getattr(ctx, "_RESULT_TABLE_FORWARD", []) or [])
snap["_RESULT_TABLE_FORWARD"] = [
(t, (items.copy() if isinstance(items, list) else list(items) if items else []), subj)
(
t,
(items.copy() if isinstance(items, list) else list(items) if items else []),
subj,
)
for (t, items, subj) in fwd
if isinstance((t, items, subj), tuple)
]
@@ -278,7 +294,9 @@ class PipelineRunner:
try:
tail = list(getattr(ctx, "_PENDING_PIPELINE_TAIL", []) or [])
snap["_PENDING_PIPELINE_TAIL"] = [list(stage) for stage in tail if isinstance(stage, list)]
snap["_PENDING_PIPELINE_TAIL"] = [
list(stage) for stage in tail if isinstance(stage, list)
]
except Exception:
pass
+47 -9
View File
@@ -1,4 +1,5 @@
"""Modern Textual UI for driving Medeia-Macina pipelines."""
from __future__ import annotations
import json
@@ -13,7 +14,18 @@ from textual.binding import Binding
from textual.events import Key
from textual.containers import Container, Horizontal, Vertical
from textual.screen import ModalScreen
from textual.widgets import Button, DataTable, Footer, Header, Input, Label, OptionList, Select, Static, TextArea
from textual.widgets import (
Button,
DataTable,
Footer,
Header,
Input,
Label,
OptionList,
Select,
Static,
TextArea,
)
from textual.widgets.option_list import Option
BASE_DIR = Path(__file__).resolve().parent
@@ -174,7 +186,9 @@ class TagEditorPopup(ModalScreen[None]):
self._save_tags_background(to_add, to_del, desired)
@work(thread=True)
def _save_tags_background(self, to_add: List[str], to_del: List[str], desired: List[str]) -> None:
def _save_tags_background(
self, to_add: List[str], to_del: List[str], desired: List[str]
) -> None:
app = self.app # PipelineHubApp
try:
runner: PipelineRunner = getattr(app, "executor")
@@ -188,14 +202,26 @@ class TagEditorPopup(ModalScreen[None]):
del_cmd = f"@1 | delete-tag -store {store_tok}{query_chunk} {del_args}"
del_res = runner.run_pipeline(del_cmd, seeds=self._seeds, isolate=True)
if not getattr(del_res, "success", False):
failures.append(str(getattr(del_res, "error", "") or getattr(del_res, "stderr", "") or "delete-tag failed").strip())
failures.append(
str(
getattr(del_res, "error", "")
or getattr(del_res, "stderr", "")
or "delete-tag failed"
).strip()
)
if to_add:
add_args = " ".join(json.dumps(t) for t in to_add)
add_cmd = f"@1 | add-tag -store {store_tok}{query_chunk} {add_args}"
add_res = runner.run_pipeline(add_cmd, seeds=self._seeds, isolate=True)
if not getattr(add_res, "success", False):
failures.append(str(getattr(add_res, "error", "") or getattr(add_res, "stderr", "") or "add-tag failed").strip())
failures.append(
str(
getattr(add_res, "error", "")
or getattr(add_res, "stderr", "")
or "add-tag failed"
).strip()
)
if failures:
msg = failures[0]
@@ -401,7 +427,9 @@ class PipelineHubApp(App):
if not suggestion:
return
self.command_input.value = self._apply_suggestion_to_text(str(self.command_input.value or ""), suggestion)
self.command_input.value = self._apply_suggestion_to_text(
str(self.command_input.value or ""), suggestion
)
if self.suggestion_list:
self.suggestion_list.display = False
event.prevent_default()
@@ -496,7 +524,11 @@ class PipelineHubApp(App):
# Identify first stage command name for conservative auto-augmentation.
first_stage_cmd = ""
try:
first_stage_cmd = str(stages[0].split()[0]).replace("_", "-").strip().lower() if stages[0].split() else ""
first_stage_cmd = (
str(stages[0].split()[0]).replace("_", "-").strip().lower()
if stages[0].split()
else ""
)
except Exception:
first_stage_cmd = ""
@@ -720,7 +752,9 @@ class PipelineHubApp(App):
item: Any = None
# Prefer mapping displayed table row -> source item.
if self.current_result_table and 0 <= index < len(getattr(self.current_result_table, "rows", []) or []):
if self.current_result_table and 0 <= index < len(
getattr(self.current_result_table, "rows", []) or []
):
row = self.current_result_table.rows[index]
src_idx = getattr(row, "source_index", None)
if isinstance(src_idx, int) and 0 <= src_idx < len(self.result_items):
@@ -782,7 +816,9 @@ class PipelineHubApp(App):
return
text = ""
idx = int(getattr(self, "_selected_row_index", 0) or 0)
if self.current_result_table and 0 <= idx < len(getattr(self.current_result_table, "rows", []) or []):
if self.current_result_table and 0 <= idx < len(
getattr(self.current_result_table, "rows", []) or []
):
row = self.current_result_table.rows[idx]
lines = [f"{col.name}: {col.value}" for col in getattr(row, "columns", []) or []]
text = "\n".join(lines)
@@ -874,7 +910,9 @@ class PipelineHubApp(App):
worker_id = str(worker.get("worker_id") or worker.get("id") or "?")[:8]
worker_type = str(worker.get("worker_type") or worker.get("type") or "?")
status = str(worker.get("status") or worker.get("result") or "running")
details = worker.get("current_step") or worker.get("description") or worker.get("pipe") or ""
details = (
worker.get("current_step") or worker.get("description") or worker.get("pipe") or ""
)
self.worker_table.add_row(worker_id, worker_type, status, str(details)[:80])
+12 -11
View File
@@ -34,7 +34,7 @@ def _split_pipeline_stages(text: str) -> list[str]:
escaped = True
continue
if ch in ("\"", "'"):
if ch in ('"', "'"):
if quote is None:
quote = ch
elif quote == ch:
@@ -148,7 +148,9 @@ def _validate_add_note_requires_add_file_order(raw: str) -> Optional[SyntaxError
parsed_q = parse_query(str(query_val))
q_hash = get_field(parsed_q, "hash") or get_field(parsed_q, "sha256")
q_store = get_field(parsed_q, "store")
has_store_hash_in_query = bool(str(q_hash or "").strip() and str(q_store or "").strip())
has_store_hash_in_query = bool(
str(q_hash or "").strip() and str(q_store or "").strip()
)
except Exception:
has_store_hash_in_query = False
@@ -157,7 +159,7 @@ def _validate_add_note_requires_add_file_order(raw: str) -> Optional[SyntaxError
return SyntaxErrorDetail(
"Pipeline error: 'add-note' must come after 'add-file' when used with piped input. "
"Move 'add-note' after 'add-file', or call it with explicit targeting: "
"add-note -query \"store:<store> hash:<sha256> title:<title>,text:<text>\"."
'add-note -query "store:<store> hash:<sha256> title:<title>,text:<text>".'
)
return None
@@ -209,7 +211,7 @@ def validate_pipeline_text(text: str) -> Optional[SyntaxErrorDetail]:
last_pipe_outside_quotes = idx
if in_double:
return SyntaxErrorDetail('Syntax error: missing closing ' + '"' + '.', expected='"')
return SyntaxErrorDetail("Syntax error: missing closing " + '"' + ".", expected='"')
if in_single:
return SyntaxErrorDetail("Syntax error: missing closing '.", expected="'")
@@ -242,7 +244,9 @@ def validate_pipeline_text(text: str) -> Optional[SyntaxErrorDetail]:
continue
if ch == "|" and not in_single and not in_double:
if not seen_nonspace_since_pipe:
return SyntaxErrorDetail("Syntax error: empty pipeline stage (use a command between '|').")
return SyntaxErrorDetail(
"Syntax error: empty pipeline stage (use a command between '|')."
)
seen_nonspace_since_pipe = False
continue
if not in_single and not in_double and not ch.isspace():
@@ -310,7 +314,9 @@ def parse_query(query: str) -> Dict[str, Any]:
return result
def get_field(parsed_query: Dict[str, Any], field_name: str, default: Optional[str] = None) -> Optional[str]:
def get_field(
parsed_query: Dict[str, Any], field_name: str, default: Optional[str] = None
) -> Optional[str]:
"""Get a field value from a parsed query."""
return parsed_query.get("fields", {}).get((field_name or "").lower(), default)
@@ -320,8 +326,3 @@ def get_free_text(parsed_query: Dict[str, Any]) -> str:
"""Get the free-text portion of a parsed query."""
return str(parsed_query.get("text", "") or "")
+7 -4
View File
@@ -11,7 +11,7 @@ REGISTRY: Dict[str, Cmdlet] = {}
def _normalize_cmd_name(name: str) -> str:
return str(name or "").replace('_', '-').lower().strip()
return str(name or "").replace("_", "-").lower().strip()
def register_callable(names: Iterable[str], fn: Cmdlet) -> Cmdlet:
@@ -35,8 +35,10 @@ def register(names: Iterable[str]):
@register(["add-tags"])
def _run(result, args, config) -> int: ...
"""
def _wrap(fn: Cmdlet) -> Cmdlet:
return register_callable(names, fn)
return _wrap
@@ -47,12 +49,11 @@ def get(cmd_name: str) -> Cmdlet | None:
# Dynamically import all cmdlet modules in this directory (ignore files starting with _ and __init__.py)
# cmdlet self-register when instantiated via their __init__ method
import os
cmdlet_dir = os.path.dirname(__file__)
for filename in os.listdir(cmdlet_dir):
if not (
filename.endswith(".py")
and not filename.startswith("_")
and filename != "__init__.py"
filename.endswith(".py") and not filename.startswith("_") and filename != "__init__.py"
):
continue
@@ -67,12 +68,14 @@ for filename in os.listdir(cmdlet_dir):
_import_module(f".{mod_name}", __name__)
except Exception as e:
import sys
print(f"Error importing cmdlet '{mod_name}': {e}", file=sys.stderr)
continue
# Import and register native commands that are not considered cmdlet
try:
from cmdnat import register_native_commands as _register_native_commands
_register_native_commands(REGISTRY)
except Exception:
# Native commands are optional; ignore if unavailable
+170 -123
View File
@@ -1,5 +1,4 @@
"""
"""
""" """
from __future__ import annotations
@@ -18,6 +17,7 @@ import models
@dataclass
class CmdletArg:
"""Represents a single cmdlet argument with optional enum choices."""
name: str
"""Argument name, e.g., '-path' or 'location'"""
type: str = "string"
@@ -89,18 +89,18 @@ class CmdletArg:
storage_flags = SharedArgs.STORAGE.to_flags()
# Returns: ('--storage', '-storage', '-s')
"""
flags = [f'--{self.name}', f'-{self.name}'] # Both double-dash and single-dash variants
flags = [f"--{self.name}", f"-{self.name}"] # Both double-dash and single-dash variants
# Add short form if alias exists
if self.alias:
flags.append(f'-{self.alias}')
flags.append(f"-{self.alias}")
# Add negation forms for flag type
if self.type == 'flag':
flags.append(f'--no-{self.name}')
flags.append(f'-no{self.name}') # Single-dash negation variant
if self.type == "flag":
flags.append(f"--no-{self.name}")
flags.append(f"-no{self.name}") # Single-dash negation variant
if self.alias:
flags.append(f'-n{self.alias}')
flags.append(f"-n{self.alias}")
return tuple(flags)
@@ -140,6 +140,7 @@ def QueryArg(
# SHARED ARGUMENTS - Reusable argument definitions across cmdlet
# ============================================================================
class SharedArgs:
"""Registry of shared CmdletArg definitions used across multiple cmdlet.
@@ -208,6 +209,7 @@ class SharedArgs:
if config is None:
try:
from config import load_config
config = load_config()
except Exception:
return []
@@ -223,32 +225,32 @@ class SharedArgs:
type="enum",
choices=["hydrus", "0x0", "local"],
required=True,
description="Destination location"
description="Destination location",
)
DELETE_FLAG = CmdletArg(
"delete",
type="flag",
description="Delete the file and its .tag after successful operation."
description="Delete the file and its .tag after successful operation.",
)
# Metadata arguments
ARTIST = CmdletArg(
"artist",
type="string",
description="Filter by artist name (case-insensitive, partial match)."
description="Filter by artist name (case-insensitive, partial match).",
)
ALBUM = CmdletArg(
"album",
type="string",
description="Filter by album name (case-insensitive, partial match)."
description="Filter by album name (case-insensitive, partial match).",
)
TRACK = CmdletArg(
"track",
type="string",
description="Filter by track title (case-insensitive, partial match)."
description="Filter by track title (case-insensitive, partial match).",
)
# Library/Search arguments
@@ -256,53 +258,36 @@ class SharedArgs:
"library",
type="string",
choices=["hydrus", "local", "soulseek", "libgen", "ftp"],
description="Search library or source location."
description="Search library or source location.",
)
TIMEOUT = CmdletArg(
"timeout",
type="integer",
description="Search or operation timeout in seconds."
"timeout", type="integer", description="Search or operation timeout in seconds."
)
LIMIT = CmdletArg(
"limit",
type="integer",
description="Maximum number of results to return."
)
LIMIT = CmdletArg("limit", type="integer", description="Maximum number of results to return.")
# Path/File arguments
PATH = CmdletArg(
"path",
type="string",
description="File or directory path."
)
OUTPUT = CmdletArg(
"output",
type="string",
description="Output file path."
)
PATH = CmdletArg("path", type="string", description="File or directory path.")
OUTPUT = CmdletArg("output", type="string", description="Output file path.")
# Generic arguments
QUERY = CmdletArg(
"query",
type="string",
description="Unified query string (e.g., hash:<sha256>, hash:{<h1>,<h2>})."
description="Unified query string (e.g., hash:<sha256>, hash:{<h1>,<h2>}).",
)
REASON = CmdletArg(
"reason",
type="string",
description="Reason or explanation for the operation."
"reason", type="string", description="Reason or explanation for the operation."
)
ARCHIVE = CmdletArg(
"archive",
type="flag",
description="Archive the URL to Wayback Machine, Archive.today, and Archive.ph (requires URL argument in cmdlet).",
alias="arch"
alias="arch",
)
@staticmethod
@@ -333,10 +318,10 @@ class SharedArgs:
path = SharedArgs.resolve_storage('hydrus') # Returns home/.hydrus/client_files
"""
storage_map = {
'local': Path.home() / "Videos",
'hydrus': Path.home() / ".hydrus" / "client_files",
'ftp': Path.home() / "FTP",
'matrix': Path.home() / "Matrix", # Placeholder, not used for upload path
"local": Path.home() / "Videos",
"hydrus": Path.home() / ".hydrus" / "client_files",
"ftp": Path.home() / "FTP",
"matrix": Path.home() / "Matrix", # Placeholder, not used for upload path
}
if storage_value is None:
@@ -395,6 +380,7 @@ class Cmdlet:
log(cmd.summary) # "Upload a media file"
log(cmd.args[0].name) # "location"
"""
name: str
""""""
summary: str
@@ -410,17 +396,15 @@ class Cmdlet:
# Execution function: func(result, args, config) -> int
exec: Optional[Callable[[Any, Sequence[str], Dict[str, Any]], int]] = field(default=None)
def _collect_names(self) -> List[str]:
"""Collect primary name plus aliases, de-duplicated and normalized."""
names: List[str] = []
if self.name:
names.append(self.name)
for alias in (self.alias or []):
for alias in self.alias or []:
if alias:
names.append(alias)
for alias in (getattr(self, "aliases", None) or []):
for alias in getattr(self, "aliases", None) or []:
if alias:
names.append(alias)
@@ -439,7 +423,9 @@ class Cmdlet:
if not callable(self.exec):
return self
try:
from . import register_callable as _register_callable # Local import to avoid circular import cost
from . import (
register_callable as _register_callable,
) # Local import to avoid circular import cost
except Exception:
return self
@@ -588,7 +574,10 @@ def parse_cmdlet_args(args: Sequence[str], cmdlet_spec: Dict[str, Any] | Cmdlet)
# However, some cmdlets may explicitly re-introduce a -hash flag.
if token_lower in {"-hash", "--hash"} and token_lower not in arg_spec_map:
try:
log("Legacy flag -hash is no longer supported. Use: -query \"hash:<sha256>\"", file=sys.stderr)
log(
'Legacy flag -hash is no longer supported. Use: -query "hash:<sha256>"',
file=sys.stderr,
)
except Exception:
pass
i += 1
@@ -597,7 +586,10 @@ def parse_cmdlet_args(args: Sequence[str], cmdlet_spec: Dict[str, Any] | Cmdlet)
# Check if this token is a known flagged argument
if token_lower in arg_spec_map:
canonical_name = arg_spec_map[token_lower]
spec = next((s for s in arg_specs if str(s.name).lstrip("-").lower() == canonical_name.lower()), None)
spec = next(
(s for s in arg_specs if str(s.name).lstrip("-").lower() == canonical_name.lower()),
None,
)
# Check if it's a flag type (which doesn't consume next value, just marks presence)
is_flag = spec and spec.type == "flag"
@@ -659,9 +651,14 @@ def parse_cmdlet_args(args: Sequence[str], cmdlet_spec: Dict[str, Any] | Cmdlet)
if query_mapped_args and raw_query is not None:
try:
from cli_syntax import parse_query as _parse_query
parsed_query = _parse_query(str(raw_query))
fields = parsed_query.get("fields", {}) if isinstance(parsed_query, dict) else {}
norm_fields = {str(k).strip().lower(): v for k, v in fields.items()} if isinstance(fields, dict) else {}
norm_fields = (
{str(k).strip().lower(): v for k, v in fields.items()}
if isinstance(fields, dict)
else {}
)
except Exception:
norm_fields = {}
@@ -763,7 +760,9 @@ def parse_single_hash_query(query: Optional[str]) -> Optional[str]:
return hashes[0]
def get_hash_for_operation(override_hash: Optional[str], result: Any, field_name: str = "hash") -> Optional[str]:
def get_hash_for_operation(
override_hash: Optional[str], result: Any, field_name: str = "hash"
) -> Optional[str]:
"""Get normalized hash from override or result object, consolidating common pattern.
Eliminates repeated pattern: normalize_hash(override) if override else normalize_hash(get_field(result, ...))
@@ -778,7 +777,11 @@ def get_hash_for_operation(override_hash: Optional[str], result: Any, field_name
"""
if override_hash:
return normalize_hash(override_hash)
hash_value = get_field(result, field_name) or getattr(result, field_name, None) or getattr(result, "hash", None)
hash_value = (
get_field(result, field_name)
or getattr(result, field_name, None)
or getattr(result, "hash", None)
)
return normalize_hash(hash_value)
@@ -808,6 +811,7 @@ def fetch_hydrus_metadata(
- error_code: 0 on success, 1 on any error (suitable for returning from cmdlet execute())
"""
from API import HydrusNetwork
hydrus_wrapper = HydrusNetwork
client = hydrus_client
@@ -816,6 +820,7 @@ def fetch_hydrus_metadata(
# Store specified: do not fall back to a global/default Hydrus client.
try:
from Store import Store
store = Store(config)
backend = store[str(store_name)]
candidate = getattr(backend, "_client", None)
@@ -885,7 +890,7 @@ def get_field(obj: Any, field: str, default: Optional[Any] = None) -> Any:
return value
# For PipeObjects, also check the extra field
extra_val = getattr(obj, 'extra', None)
extra_val = getattr(obj, "extra", None)
if isinstance(extra_val, dict):
return extra_val.get(field, default)
@@ -957,7 +962,9 @@ def pipeline_item_local_path(item: Any) -> Optional[str]:
return text
def collect_relationship_labels(payload: Any, label_stack: List[str] | None = None, mapping: Dict[str, str] | None = None) -> Dict[str, str]:
def collect_relationship_labels(
payload: Any, label_stack: List[str] | None = None, mapping: Dict[str, str] | None = None
) -> Dict[str, str]:
"""Recursively extract hash-to-label mappings from nested relationship data.
Walks through nested dicts/lists looking for sha256-like strings (64 hex chars)
@@ -990,7 +997,7 @@ def collect_relationship_labels(payload: Any, label_stack: List[str] | None = No
for key, value in payload.items():
next_stack = label_stack
if isinstance(key, str) and key:
formatted = key.replace('_', ' ').strip()
formatted = key.replace("_", " ").strip()
next_stack = label_stack + [formatted]
collect_relationship_labels(value, next_stack, mapping)
elif isinstance(payload, (list, tuple, set)):
@@ -1021,22 +1028,22 @@ def parse_tag_arguments(arguments: Sequence[str]) -> List[str]:
"""
def _expand_pipe_namespace(text: str) -> List[str]:
parts = text.split('|')
parts = text.split("|")
expanded: List[str] = []
last_ns: Optional[str] = None
for part in parts:
segment = part.strip()
if not segment:
continue
if ':' in segment:
ns, val = segment.split(':', 1)
if ":" in segment:
ns, val = segment.split(":", 1)
ns = ns.strip()
val = val.strip()
last_ns = ns or last_ns
if last_ns and val:
expanded.append(f"{last_ns}:{val}")
elif ns or val:
expanded.append(f"{ns}:{val}".strip(':'))
expanded.append(f"{ns}:{val}".strip(":"))
else:
if last_ns:
expanded.append(f"{last_ns}:{segment}")
@@ -1046,7 +1053,7 @@ def parse_tag_arguments(arguments: Sequence[str]) -> List[str]:
tags: List[str] = []
for argument in arguments:
for token in argument.split(','):
for token in argument.split(","):
text = token.strip()
if not text:
continue
@@ -1056,8 +1063,8 @@ def parse_tag_arguments(arguments: Sequence[str]) -> List[str]:
candidate = entry.strip()
if not candidate:
continue
if ':' in candidate:
ns, val = candidate.split(':', 1)
if ":" in candidate:
ns, val = candidate.split(":", 1)
ns = ns.strip()
val = val.strip()
candidate = f"{ns}:{val}" if ns or val else ""
@@ -1159,7 +1166,9 @@ def _load_tag_groups() -> Dict[str, List[str]]:
elif isinstance(value, str):
normalised = _normalise_tag_group_entry(value)
if normalised:
members.extend(token.strip() for token in normalised.split(",") if token.strip())
members.extend(
token.strip() for token in normalised.split(",") if token.strip()
)
if members:
groups[name] = members
@@ -1270,6 +1279,7 @@ def apply_preferred_title(tags: List[str], preferred: Optional[str]) -> List[str
# PIPEOBJECT UTILITIES (for chainable cmdlet and multi-action pipelines)
# ============================================================================
def create_pipe_object_result(
source: str,
identifier: str,
@@ -1280,7 +1290,7 @@ def create_pipe_object_result(
is_temp: bool = False,
parent_hash: Optional[str] = None,
tag: Optional[List[str]] = None,
**extra: Any
**extra: Any,
) -> Dict[str, Any]:
"""Create a PipeObject-compatible result dict for pipeline chaining.
@@ -1303,27 +1313,27 @@ def create_pipe_object_result(
Dict with all PipeObject fields for emission
"""
result: Dict[str, Any] = {
'source': source,
'id': identifier,
'path': file_path,
'action': f'cmdlet:{cmdlet_name}', # Format: cmdlet:cmdlet_name
"source": source,
"id": identifier,
"path": file_path,
"action": f"cmdlet:{cmdlet_name}", # Format: cmdlet:cmdlet_name
}
if title:
result['title'] = title
result["title"] = title
if hash_value:
result['hash'] = hash_value
result["hash"] = hash_value
if is_temp:
result['is_temp'] = True
result["is_temp"] = True
if parent_hash:
result['parent_hash'] = parent_hash
result["parent_hash"] = parent_hash
if tag:
result['tag'] = tag
result["tag"] = tag
# Canonical store field: use source for compatibility
try:
if source:
result['store'] = source
result["store"] = source
except Exception:
pass
@@ -1342,7 +1352,7 @@ def mark_as_temp(pipe_object: Dict[str, Any]) -> Dict[str, Any]:
Returns:
Modified dict with is_temp=True
"""
pipe_object['is_temp'] = True
pipe_object["is_temp"] = True
return pipe_object
@@ -1356,7 +1366,7 @@ def set_parent_hash(pipe_object: Dict[str, Any], parent_hash: str) -> Dict[str,
Returns:
Modified dict with parent_hash set to the hash
"""
pipe_object['parent_hash'] = parent_hash
pipe_object["parent_hash"] = parent_hash
return pipe_object
@@ -1364,13 +1374,13 @@ def get_pipe_object_path(pipe_object: Any) -> Optional[str]:
"""Extract file path from PipeObject, dict, or pipeline-friendly object."""
if pipe_object is None:
return None
for attr in ('path', 'target'):
for attr in ("path", "target"):
if hasattr(pipe_object, attr):
value = getattr(pipe_object, attr)
if value:
return value
if isinstance(pipe_object, dict):
for key in ('path', 'target'):
for key in ("path", "target"):
value = pipe_object.get(key)
if value:
return value
@@ -1439,6 +1449,7 @@ def _print_live_safe_stderr(message: str) -> None:
cm = None
try:
import pipeline as _pipeline_ctx # type: ignore
suspend = getattr(_pipeline_ctx, "suspend_live_progress", None)
cm = suspend() if callable(suspend) else None
except Exception:
@@ -1624,6 +1635,7 @@ def _print_saved_output_panel(item: Any, final_path: Path) -> None:
# doesn't get overwritten/truncated by Live's cursor control.
try:
import pipeline as _pipeline_ctx # type: ignore
suspend = getattr(_pipeline_ctx, "suspend_live_progress", None)
cm = suspend() if callable(suspend) else None
except Exception:
@@ -1661,6 +1673,7 @@ def _print_saved_output_panel(item: Any, final_path: Path) -> None:
if not file_hash:
try:
from SYS.utils import sha256_file # type: ignore
file_hash = str(sha256_file(final_path) or "").strip()
except Exception:
file_hash = ""
@@ -1729,13 +1742,13 @@ def get_pipe_object_hash(pipe_object: Any) -> Optional[str]:
"""Extract file hash from PipeObject, dict, or pipeline-friendly object."""
if pipe_object is None:
return None
for attr in ('hash',):
for attr in ("hash",):
if hasattr(pipe_object, attr):
value = getattr(pipe_object, attr)
if value:
return value
if isinstance(pipe_object, dict):
for key in ('hash',):
for key in ("hash",):
value = pipe_object.get(key)
if value:
return value
@@ -1771,7 +1784,7 @@ def normalize_result_input(result: Any) -> List[Dict[str, Any]]:
for item in result:
if isinstance(item, dict):
output.append(item)
elif hasattr(item, 'to_dict'):
elif hasattr(item, "to_dict"):
output.append(item.to_dict())
else:
# Try as-is
@@ -1779,7 +1792,7 @@ def normalize_result_input(result: Any) -> List[Dict[str, Any]]:
return output
# PipeObject or other object with to_dict
if hasattr(result, 'to_dict'):
if hasattr(result, "to_dict"):
return [result.to_dict()]
# Fallback: wrap it
@@ -1807,11 +1820,11 @@ def filter_results_by_temp(results: List[Any], include_temp: bool = False) -> Li
is_temp = False
# Check PipeObject
if hasattr(result, 'is_temp'):
if hasattr(result, "is_temp"):
is_temp = result.is_temp
# Check dict
elif isinstance(result, dict):
is_temp = result.get('is_temp', False)
is_temp = result.get("is_temp", False)
if not is_temp:
filtered.append(result)
@@ -1844,7 +1857,9 @@ def merge_sequences(*sources: Optional[Iterable[Any]], case_sensitive: bool = Tr
return merged
def collapse_namespace_tags(tags: Optional[Iterable[Any]], namespace: str, prefer: str = "last") -> list[str]:
def collapse_namespace_tags(
tags: Optional[Iterable[Any]], namespace: str, prefer: str = "last"
) -> list[str]:
"""Reduce tags so only one entry for a given namespace remains.
Keeps either the first or last occurrence (default last) while preserving overall order
@@ -1885,7 +1900,9 @@ def collapse_namespace_tags(tags: Optional[Iterable[Any]], namespace: str, prefe
return result
def collapse_namespace_tag(tags: Optional[Iterable[Any]], namespace: str, prefer: str = "last") -> list[str]:
def collapse_namespace_tag(
tags: Optional[Iterable[Any]], namespace: str, prefer: str = "last"
) -> list[str]:
"""Singular alias for collapse_namespace_tags.
Some cmdlet prefer the singular name; keep behavior centralized.
@@ -1898,28 +1915,28 @@ def extract_tag_from_result(result: Any) -> list[str]:
if isinstance(result, models.PipeObject):
tag.extend(result.tag or [])
if isinstance(result.extra, dict):
extra_tag = result.extra.get('tag')
extra_tag = result.extra.get("tag")
if isinstance(extra_tag, list):
tag.extend(extra_tag)
elif isinstance(extra_tag, str):
tag.append(extra_tag)
elif hasattr(result, 'tag'):
elif hasattr(result, "tag"):
# Handle objects with tag attribute (e.g. SearchResult)
val = getattr(result, 'tag')
val = getattr(result, "tag")
if isinstance(val, (list, set, tuple)):
tag.extend(val)
elif isinstance(val, str):
tag.append(val)
if isinstance(result, dict):
raw_tag = result.get('tag')
raw_tag = result.get("tag")
if isinstance(raw_tag, list):
tag.extend(raw_tag)
elif isinstance(raw_tag, str):
tag.append(raw_tag)
extra = result.get('extra')
extra = result.get("extra")
if isinstance(extra, dict):
extra_tag = extra.get('tag')
extra_tag = extra.get("tag")
if isinstance(extra_tag, list):
tag.extend(extra_tag)
elif isinstance(extra_tag, str):
@@ -1931,10 +1948,10 @@ def extract_title_from_result(result: Any) -> Optional[str]:
"""Extract the title from a result dict or PipeObject."""
if isinstance(result, models.PipeObject):
return result.title
elif hasattr(result, 'title'):
return getattr(result, 'title')
elif hasattr(result, "title"):
return getattr(result, "title")
elif isinstance(result, dict):
return result.get('title')
return result.get("title")
return None
@@ -1950,26 +1967,26 @@ def extract_url_from_result(result: Any) -> list[str]:
url.append(candidate)
if isinstance(result, models.PipeObject):
_extend(result.extra.get('url'))
_extend(result.extra.get('url')) # Also check singular url
_extend(result.extra.get("url"))
_extend(result.extra.get("url")) # Also check singular url
if isinstance(result.metadata, dict):
_extend(result.metadata.get('url'))
_extend(result.metadata.get('url'))
_extend(result.metadata.get('url'))
elif hasattr(result, 'url') or hasattr(result, 'url'):
_extend(result.metadata.get("url"))
_extend(result.metadata.get("url"))
_extend(result.metadata.get("url"))
elif hasattr(result, "url") or hasattr(result, "url"):
# Handle objects with url/url attribute
_extend(getattr(result, 'url', None))
_extend(getattr(result, 'url', None))
_extend(getattr(result, "url", None))
_extend(getattr(result, "url", None))
if isinstance(result, dict):
_extend(result.get('url'))
_extend(result.get('url'))
_extend(result.get('url'))
extra = result.get('extra')
_extend(result.get("url"))
_extend(result.get("url"))
_extend(result.get("url"))
extra = result.get("extra")
if isinstance(extra, dict):
_extend(extra.get('url'))
_extend(extra.get('url'))
_extend(extra.get('url'))
_extend(extra.get("url"))
_extend(extra.get("url"))
_extend(extra.get("url"))
return merge_sequences(url, case_sensitive=True)
@@ -1979,7 +1996,7 @@ def extract_relationships(result: Any) -> Optional[Dict[str, Any]]:
relationships = result.get_relationships()
return relationships or None
if isinstance(result, dict):
relationships = result.get('relationships')
relationships = result.get("relationships")
if isinstance(relationships, dict) and relationships:
return relationships
return None
@@ -1990,11 +2007,11 @@ def extract_duration(result: Any) -> Optional[float]:
if isinstance(result, models.PipeObject):
duration = result.duration
elif isinstance(result, dict):
duration = result.get('duration')
duration = result.get("duration")
if duration is None:
metadata = result.get('metadata')
metadata = result.get("metadata")
if isinstance(metadata, dict):
duration = metadata.get('duration')
duration = metadata.get("duration")
if duration is None:
return None
try:
@@ -2011,7 +2028,12 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
# Debug: Print ResultItem details if coming from search_file.py
try:
from SYS.logger import is_debug_enabled, debug
if is_debug_enabled() and hasattr(value, '__class__') and value.__class__.__name__ == 'ResultItem':
if (
is_debug_enabled()
and hasattr(value, "__class__")
and value.__class__.__name__ == "ResultItem"
):
debug("[ResultItem -> PipeObject conversion]")
debug(f" title={getattr(value, 'title', None)}")
debug(f" target={getattr(value, 'target', None)}")
@@ -2023,7 +2045,9 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
debug(f" duration_seconds={getattr(value, 'duration_seconds', None)}")
debug(f" relationships={getattr(value, 'relationships', None)}")
debug(f" url={getattr(value, 'url', None)}")
debug(f" full_metadata keys={list(getattr(value, 'full_metadata', {}).keys()) if hasattr(value, 'full_metadata') and value.full_metadata else []}")
debug(
f" full_metadata keys={list(getattr(value, 'full_metadata', {}).keys()) if hasattr(value, 'full_metadata') and value.full_metadata else []}"
)
except Exception:
pass
@@ -2031,8 +2055,20 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
return value
known_keys = {
"hash", "store", "tag", "title", "url", "source_url", "duration", "metadata",
"warnings", "path", "relationships", "is_temp", "action", "parent_hash",
"hash",
"store",
"tag",
"title",
"url",
"source_url",
"duration",
"metadata",
"warnings",
"path",
"relationships",
"is_temp",
"action",
"parent_hash",
}
# Convert common object-like results into a dict so we can preserve fields like
@@ -2041,7 +2077,7 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
# Priority:
# 1) explicit to_dict()
# 2) best-effort attribute extraction for known PipeObject-ish fields
if hasattr(value, 'to_dict'):
if hasattr(value, "to_dict"):
value = value.to_dict()
elif not isinstance(value, dict):
try:
@@ -2095,6 +2131,7 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
try:
from SYS.utils import sha256_file
from pathlib import Path
hash_val = sha256_file(Path(path_val))
except Exception:
hash_val = "unknown"
@@ -2108,6 +2145,7 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
if path_val:
try:
from pathlib import Path
title_val = Path(path_val).stem
except Exception:
pass
@@ -2116,6 +2154,7 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
# Extract URL: prefer direct url field, then url list
from metadata import normalize_urls
url_list = normalize_urls(value.get("url"))
url_val = url_list[0] if url_list else None
if len(url_list) > 1:
@@ -2138,12 +2177,17 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
# Only use target as path if it's not a URL (url should stay in url field)
if not path_val and "target" in value:
target = value["target"]
if target and not (isinstance(target, str) and (target.startswith("http://") or target.startswith("https://"))):
if target and not (
isinstance(target, str)
and (target.startswith("http://") or target.startswith("https://"))
):
path_val = target
# If the path value is actually a URL, move it to url_val and clear path_val
try:
if isinstance(path_val, str) and (path_val.startswith("http://") or path_val.startswith("https://")):
if isinstance(path_val, str) and (
path_val.startswith("http://") or path_val.startswith("https://")
):
# Prefer existing url_val if present, otherwise move path_val into url_val
if not url_val:
url_val = path_val
@@ -2158,7 +2202,10 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
pipe_obj = models.PipeObject(
hash=hash_val,
store=store_val,
provider=str(value.get("provider") or value.get("prov") or extra.get("provider") or "").strip() or None,
provider=str(
value.get("provider") or value.get("prov") or extra.get("provider") or ""
).strip()
or None,
tag=tag_val,
title=title_val,
url=url_val,
@@ -2196,6 +2243,7 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
try:
from SYS.utils import sha256_file
from pathlib import Path
path_obj = Path(path_val)
hash_val = sha256_file(path_obj)
# Extract title from filename (without extension)
@@ -2281,4 +2329,3 @@ def register_url_with_local_library(pipe_obj: models.PipeObject, config: Dict[st
return True # url already existed
except Exception:
return False
+384 -136
View File
File diff suppressed because it is too large Load Diff
+28 -12
View File
@@ -27,11 +27,10 @@ class Add_Note(Cmdlet):
super().__init__(
name="add-note",
summary="Add file store note",
usage="add-note (-query \"title:<title>,text:<text>[,store:<store>][,hash:<sha256>]\") [ -store <store> | <piped> ]",
usage='add-note (-query "title:<title>,text:<text>[,store:<store>][,hash:<sha256>]") [ -store <store> | <piped> ]',
alias=[""],
arg=[
SharedArgs.STORE,
QueryArg(
"hash",
key="hash",
@@ -116,11 +115,13 @@ class Add_Note(Cmdlet):
# Fallback: best-effort regex.
name_match = re.search(r"\btitle\s*:\s*([^,\s]+)", normalized, flags=re.IGNORECASE)
text_match = re.search(r"\btext\s*:\s*(.+)$", normalized, flags=re.IGNORECASE)
note_name = (name_match.group(1).strip() if name_match else "")
note_text = (text_match.group(1).strip() if text_match else "")
note_name = name_match.group(1).strip() if name_match else ""
note_text = text_match.group(1).strip() if text_match else ""
return (note_name or None, note_text or None)
def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]:
def _resolve_hash(
self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]
) -> Optional[str]:
resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash)
if resolved:
return resolved
@@ -148,11 +149,17 @@ class Add_Note(Cmdlet):
hash_override = normalize_hash(parsed.get("hash"))
note_name, note_text = self._parse_note_query(str(parsed.get("query") or ""))
if not note_name or not note_text:
log("[add_note] Error: -query must include title:<title> and text:<text>", file=sys.stderr)
log(
"[add_note] Error: -query must include title:<title> and text:<text>",
file=sys.stderr,
)
return 1
if hash_override and not store_override:
log("[add_note] Error: hash:<sha256> requires store:<store> in -query or -store <store>", file=sys.stderr)
log(
"[add_note] Error: hash:<sha256> requires store:<store> in -query or -store <store>",
file=sys.stderr,
)
return 1
explicit_target = bool(hash_override and store_override)
@@ -166,7 +173,9 @@ class Add_Note(Cmdlet):
backend = store_registry[str(store_override)]
ok = bool(backend.set_note(str(hash_override), note_name, note_text, config=config))
if ok:
ctx.print_if_visible(f"✓ add-note: 1 item in '{store_override}'", file=sys.stderr)
ctx.print_if_visible(
f"✓ add-note: 1 item in '{store_override}'", file=sys.stderr
)
except Exception as exc:
log(f"[add_note] Error: Failed to set note: {exc}", file=sys.stderr)
return 1
@@ -180,7 +189,10 @@ class Add_Note(Cmdlet):
# Allow standalone use (no piped input) and enable piping the target forward.
results = [{"store": str(store_override), "hash": hash_override}]
else:
log("[add_note] Error: Requires piped item(s) from add-file, or explicit targeting via store/hash (e.g., -query \"store:<store> hash:<sha256> ...\")", file=sys.stderr)
log(
'[add_note] Error: Requires piped item(s) from add-file, or explicit targeting via store/hash (e.g., -query "store:<store> hash:<sha256> ...")',
file=sys.stderr,
)
return 1
store_registry = Store(config)
@@ -242,10 +254,15 @@ class Add_Note(Cmdlet):
try:
ok = bool(bulk_fn(list(ops), config=config))
wrote_any = wrote_any or ok or True
ctx.print_if_visible(f"✓ add-note: {len(ops)} item(s) in '{store_name}'", file=sys.stderr)
ctx.print_if_visible(
f"✓ add-note: {len(ops)} item(s) in '{store_name}'", file=sys.stderr
)
continue
except Exception as exc:
log(f"[add_note] Warning: bulk set_note failed for '{store_name}': {exc}; falling back", file=sys.stderr)
log(
f"[add_note] Warning: bulk set_note failed for '{store_name}': {exc}; falling back",
file=sys.stderr,
)
# Fallback: per-item writes
for file_hash, name, text in ops:
@@ -260,4 +277,3 @@ class Add_Note(Cmdlet):
CMDLET = Add_Note()
+204 -57
View File
@@ -29,12 +29,28 @@ CMDLET = Cmdlet(
summary="Associate file relationships (king/alt/related) in Hydrus based on relationship tags in sidecar.",
usage="@1-3 | add-relationship -king @4 OR add-relationship -path <file> OR @1,@2,@3 | add-relationship",
arg=[
CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."),
CmdletArg(
"path",
type="string",
description="Specify the local file path (if not piping a result).",
),
SharedArgs.STORE,
SharedArgs.QUERY,
CmdletArg("-king", type="string", description="Explicitly set the king hash/file for relationships (e.g., -king @4 or -king hash)"),
CmdletArg("-alt", type="string", description="Explicitly select alt item(s) by @ selection or hash list (e.g., -alt @3-5 or -alt <hash>,<hash>)"),
CmdletArg("-type", type="string", description="Relationship type for piped items (default: 'alt', options: 'king', 'alt', 'related')"),
CmdletArg(
"-king",
type="string",
description="Explicitly set the king hash/file for relationships (e.g., -king @4 or -king hash)",
),
CmdletArg(
"-alt",
type="string",
description="Explicitly select alt item(s) by @ selection or hash list (e.g., -alt @3-5 or -alt <hash>,<hash>)",
),
CmdletArg(
"-type",
type="string",
description="Relationship type for piped items (default: 'alt', options: 'king', 'alt', 'related')",
),
],
detail=[
"- Mode 1: Pipe multiple items, first becomes king, rest become alts (default)",
@@ -54,7 +70,7 @@ def _normalise_hash_hex(value: Optional[str]) -> Optional[str]:
if not value or not isinstance(value, str):
return None
normalized = value.strip().lower()
if len(normalized) == 64 and all(c in '0123456789abcdef' for c in normalized):
if len(normalized) == 64 and all(c in "0123456789abcdef" for c in normalized):
return normalized
return None
@@ -73,7 +89,7 @@ def _extract_relationships_from_tag(tag_value: str) -> Dict[str, list[str]]:
return result
# Match patterns like hash(king)HASH or hash(type)<HASH>
pattern = r'hash\((\w+)\)<?([a-fA-F0-9]{64})>?'
pattern = r"hash\((\w+)\)<?([a-fA-F0-9]{64})>?"
matches = re.findall(pattern, tag_value)
if matches:
@@ -118,7 +134,11 @@ def _apply_relationships_from_tags(
- Treat the first hash (king) as the king.
- Store directional alt -> king relationships (no reverse edge).
"""
rel_tags = [t for t in relationship_tags if isinstance(t, str) and t.strip().lower().startswith("relationship:")]
rel_tags = [
t
for t in relationship_tags
if isinstance(t, str) and t.strip().lower().startswith("relationship:")
]
if not rel_tags:
return 0
@@ -166,7 +186,9 @@ def _apply_relationships_from_tags(
# For local DB we treat all non-king hashes as alts.
alt_hashes: list[str] = []
for bucket in ("alt", "related"):
alt_hashes.extend([h for h in (rels.get(bucket) or []) if isinstance(h, str)])
alt_hashes.extend(
[h for h in (rels.get(bucket) or []) if isinstance(h, str)]
)
for alt in alt_hashes:
alt_norm = _normalise_hash_hex(alt)
@@ -191,7 +213,7 @@ def _parse_at_selection(token: str) -> Optional[list[int]]:
if not isinstance(token, str):
return None
t = token.strip()
if not t.startswith('@'):
if not t.startswith("@"):
return None
if t == "@*":
return [] # special sentinel: caller interprets as "all"
@@ -300,20 +322,21 @@ def _resolve_king_reference(king_arg: str) -> Optional[str]:
return normalized
# Try to resolve as @ selection from pipeline context
if king_arg.startswith('@'):
if king_arg.startswith("@"):
selected = _resolve_items_from_at(king_arg)
if not selected:
log(f"Cannot resolve {king_arg}: no selection context", file=sys.stderr)
return None
if len(selected) != 1:
log(f"{king_arg} selects {len(selected)} items; -king requires exactly 1", file=sys.stderr)
log(
f"{king_arg} selects {len(selected)} items; -king requires exactly 1",
file=sys.stderr,
)
return None
item = selected[0]
item_hash = (
get_field(item, 'hash_hex')
or get_field(item, 'hash')
or get_field(item, 'file_hash')
get_field(item, "hash_hex") or get_field(item, "hash") or get_field(item, "file_hash")
)
if item_hash:
@@ -327,7 +350,12 @@ def _resolve_king_reference(king_arg: str) -> Optional[str]:
return None
def _refresh_relationship_view_if_current(target_hash: Optional[str], target_path: Optional[str], other: Optional[str], config: Dict[str, Any]) -> None:
def _refresh_relationship_view_if_current(
target_hash: Optional[str],
target_path: Optional[str],
other: Optional[str],
config: Dict[str, Any],
) -> None:
"""If the current subject matches the target, refresh relationships via get-relationship."""
try:
from cmdlet import get as get_cmdlet # type: ignore
@@ -356,11 +384,32 @@ def _refresh_relationship_view_if_current(target_hash: Optional[str], target_pat
subj_hashes: list[str] = []
subj_paths: list[str] = []
if isinstance(subject, dict):
subj_hashes = [norm(v) for v in [subject.get("hydrus_hash"), subject.get("hash"), subject.get("hash_hex"), subject.get("file_hash")] if v]
subj_paths = [norm(v) for v in [subject.get("file_path"), subject.get("path"), subject.get("target")] if v]
subj_hashes = [
norm(v)
for v in [
subject.get("hydrus_hash"),
subject.get("hash"),
subject.get("hash_hex"),
subject.get("file_hash"),
]
if v
]
subj_paths = [
norm(v)
for v in [subject.get("file_path"), subject.get("path"), subject.get("target")]
if v
]
else:
subj_hashes = [norm(getattr(subject, f, None)) for f in ("hydrus_hash", "hash", "hash_hex", "file_hash") if getattr(subject, f, None)]
subj_paths = [norm(getattr(subject, f, None)) for f in ("file_path", "path", "target") if getattr(subject, f, None)]
subj_hashes = [
norm(getattr(subject, f, None))
for f in ("hydrus_hash", "hash", "hash_hex", "file_hash")
if getattr(subject, f, None)
]
subj_paths = [
norm(getattr(subject, f, None))
for f in ("file_path", "path", "target")
if getattr(subject, f, None)
]
is_match = False
if target_hashes and any(h in subj_hashes for h in target_hashes):
@@ -420,7 +469,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if alt_arg:
alt_text = str(alt_arg).strip()
resolved_alt_items: list[Any] = []
if alt_text.startswith('@'):
if alt_text.startswith("@"):
selected = _resolve_items_from_at(alt_text)
if not selected:
log(f"Failed to resolve -alt {alt_text}: no selection context", file=sys.stderr)
@@ -431,7 +480,10 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
parts = [p.strip() for p in alt_text.replace(";", ",").split(",") if p.strip()]
hashes = [h for h in (_normalise_hash_hex(p) for p in parts) if h]
if not hashes:
log("Invalid -alt value (expected @ selection or 64-hex sha256 hash list)", file=sys.stderr)
log(
"Invalid -alt value (expected @ selection or 64-hex sha256 hash list)",
file=sys.stderr,
)
return 1
if not override_store:
log("-store is required when using -alt with a raw hash list", file=sys.stderr)
@@ -459,13 +511,16 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
king_store: Optional[str] = None
if king_arg:
king_text = str(king_arg).strip()
if king_text.startswith('@'):
if king_text.startswith("@"):
selected = _resolve_items_from_at(king_text)
if not selected:
log(f"Cannot resolve {king_text}: no selection context", file=sys.stderr)
return 1
if len(selected) != 1:
log(f"{king_text} selects {len(selected)} items; -king requires exactly 1", file=sys.stderr)
log(
f"{king_text} selects {len(selected)} items; -king requires exactly 1",
file=sys.stderr,
)
return 1
king_hash, king_store = _extract_hash_and_store(selected[0])
if not king_hash:
@@ -490,18 +545,27 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if len(stores) == 1:
store_name = next(iter(stores))
elif len(stores) > 1:
log("Multiple stores detected (king/alt across stores); use -store and ensure all selections are from the same store", file=sys.stderr)
log(
"Multiple stores detected (king/alt across stores); use -store and ensure all selections are from the same store",
file=sys.stderr,
)
return 1
# Enforce same-store relationships when store context is available.
if king_store and store_name and str(king_store) != str(store_name):
log(f"Cross-store relationship blocked: king is in store '{king_store}' but -store is '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: king is in store '{king_store}' but -store is '{store_name}'",
file=sys.stderr,
)
return 1
if store_name:
for item in items_to_process:
s = get_field(item, "store")
if s and str(s) != str(store_name):
log(f"Cross-store relationship blocked: alt item store '{s}' != '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: alt item store '{s}' != '{store_name}'",
file=sys.stderr,
)
return 1
# Resolve backend for store/hash operations
@@ -564,7 +628,11 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
sidecar_path = find_sidecar(arg_path)
if sidecar_path is not None and sidecar_path.exists():
_, tags, _ = read_sidecar(sidecar_path)
relationship_tags = [t for t in (tags or []) if isinstance(t, str) and t.lower().startswith("relationship:")]
relationship_tags = [
t
for t in (tags or [])
if isinstance(t, str) and t.lower().startswith("relationship:")
]
if relationship_tags:
code = _apply_relationships_from_tags(
relationship_tags,
@@ -587,7 +655,13 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
else:
tags_val = getattr(item, "tag", None)
if isinstance(tags_val, list):
rel_tags_from_pipe.extend([t for t in tags_val if isinstance(t, str) and t.lower().startswith("relationship:")])
rel_tags_from_pipe.extend(
[
t
for t in tags_val
if isinstance(t, str) and t.lower().startswith("relationship:")
]
)
elif isinstance(tags_val, str) and tags_val.lower().startswith("relationship:"):
rel_tags_from_pipe.append(tags_val)
@@ -613,7 +687,10 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
for item in items_to_process:
h, item_store = _extract_hash_and_store(item)
if item_store and store_name and str(item_store) != str(store_name):
log(f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'",
file=sys.stderr,
)
return 1
if not h:
continue
@@ -622,19 +699,26 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
continue
# directional alt -> king by default for local DB
bidirectional = str(rel_type).lower() != "alt"
db.set_relationship_by_hash(h, first_hash, str(rel_type), bidirectional=bidirectional)
db.set_relationship_by_hash(
h, first_hash, str(rel_type), bidirectional=bidirectional
)
return 0
# Mode 2: explicit king
for item in items_to_process:
h, item_store = _extract_hash_and_store(item)
if item_store and store_name and str(item_store) != str(store_name):
log(f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'",
file=sys.stderr,
)
return 1
if not h or h == king_hash:
continue
bidirectional = str(rel_type).lower() != "alt"
db.set_relationship_by_hash(h, king_hash, str(rel_type), bidirectional=bidirectional)
db.set_relationship_by_hash(
h, king_hash, str(rel_type), bidirectional=bidirectional
)
return 0
except Exception as exc:
log(f"Failed to set store relationships: {exc}", file=sys.stderr)
@@ -648,7 +732,10 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Verify hashes exist in this Hydrus backend to prevent cross-store edges.
if king_hash and (not _hydrus_hash_exists(hydrus_client, king_hash)):
log(f"Cross-store relationship blocked: king hash not found in store '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: king hash not found in store '{store_name}'",
file=sys.stderr,
)
return 1
# Mode 1: first is king
@@ -657,19 +744,28 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
for item in items_to_process:
h, item_store = _extract_hash_and_store(item)
if item_store and store_name and str(item_store) != str(store_name):
log(f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'",
file=sys.stderr,
)
return 1
if not h:
continue
if not first_hash:
first_hash = h
if not _hydrus_hash_exists(hydrus_client, first_hash):
log(f"Cross-store relationship blocked: hash not found in store '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: hash not found in store '{store_name}'",
file=sys.stderr,
)
return 1
continue
if h != first_hash:
if not _hydrus_hash_exists(hydrus_client, h):
log(f"Cross-store relationship blocked: hash not found in store '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: hash not found in store '{store_name}'",
file=sys.stderr,
)
return 1
hydrus_client.set_relationship(h, first_hash, str(rel_type))
return 0
@@ -678,12 +774,18 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
for item in items_to_process:
h, item_store = _extract_hash_and_store(item)
if item_store and store_name and str(item_store) != str(store_name):
log(f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'",
file=sys.stderr,
)
return 1
if not h or h == king_hash:
continue
if not _hydrus_hash_exists(hydrus_client, h):
log(f"Cross-store relationship blocked: hash not found in store '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: hash not found in store '{store_name}'",
file=sys.stderr,
)
return 1
hydrus_client.set_relationship(h, king_hash, str(rel_type))
return 0
@@ -705,6 +807,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# (kept for -path sidecar workflows; store/hash mode above is preferred)
from API.folder import LocalLibrarySearchOptimizer
from config import get_local_storage_path
local_storage_path = get_local_storage_path(config) if config else None
use_local_storage = bool(local_storage_path)
local_storage_root: Optional[Path] = None
@@ -743,13 +846,22 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
return 1
king_file_path = opt.db.search_hash(normalized_king)
if not king_file_path:
log(f"King hash not found in local DB: {king_hash}", file=sys.stderr)
log(
f"King hash not found in local DB: {king_hash}", file=sys.stderr
)
return 1
bidirectional = str(rel_type).lower() != "alt"
opt.db.set_relationship(file_path_obj, king_file_path, rel_type, bidirectional=bidirectional)
log(f"Set {rel_type} relationship: {file_path_obj.name} -> {king_file_path.name}", file=sys.stderr)
_refresh_relationship_view_if_current(None, str(file_path_obj), str(king_file_path), config)
opt.db.set_relationship(
file_path_obj, king_file_path, rel_type, bidirectional=bidirectional
)
log(
f"Set {rel_type} relationship: {file_path_obj.name} -> {king_file_path.name}",
file=sys.stderr,
)
_refresh_relationship_view_if_current(
None, str(file_path_obj), str(king_file_path), config
)
else:
# Original behavior: first becomes king, rest become alts
try:
@@ -760,16 +872,29 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if not king_path:
try:
ctx.store_value("relationship_king_path", str(file_path_obj))
log(f"Established king file: {file_path_obj.name}", file=sys.stderr)
log(
f"Established king file: {file_path_obj.name}",
file=sys.stderr,
)
continue
except Exception:
pass
if king_path and king_path != str(file_path_obj):
bidirectional = str(rel_type).lower() != "alt"
opt.db.set_relationship(file_path_obj, Path(king_path), rel_type, bidirectional=bidirectional)
log(f"Set {rel_type} relationship: {file_path_obj.name} -> {Path(king_path).name}", file=sys.stderr)
_refresh_relationship_view_if_current(None, str(file_path_obj), str(king_path), config)
opt.db.set_relationship(
file_path_obj,
Path(king_path),
rel_type,
bidirectional=bidirectional,
)
log(
f"Set {rel_type} relationship: {file_path_obj.name} -> {Path(king_path).name}",
file=sys.stderr,
)
_refresh_relationship_view_if_current(
None, str(file_path_obj), str(king_path), config
)
except Exception as exc:
log(f"Local storage error: {exc}", file=sys.stderr)
return 1
@@ -788,9 +913,14 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
hydrus_client.set_relationship(file_hash, king_hash, rel_type)
log(
f"[add-relationship] Set {rel_type} relationship: {file_hash} <-> {king_hash}",
file=sys.stderr
file=sys.stderr,
)
_refresh_relationship_view_if_current(
file_hash,
str(file_path_from_result) if file_path_from_result is not None else None,
king_hash,
config,
)
_refresh_relationship_view_if_current(file_hash, str(file_path_from_result) if file_path_from_result is not None else None, king_hash, config)
except Exception as exc:
log(f"Failed to set relationship: {exc}", file=sys.stderr)
return 1
@@ -816,9 +946,18 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
hydrus_client.set_relationship(file_hash, existing_king, rel_type)
log(
f"[add-relationship] Set {rel_type} relationship: {file_hash} <-> {existing_king}",
file=sys.stderr
file=sys.stderr,
)
_refresh_relationship_view_if_current(
file_hash,
(
str(file_path_from_result)
if file_path_from_result is not None
else None
),
existing_king,
config,
)
_refresh_relationship_view_if_current(file_hash, str(file_path_from_result) if file_path_from_result is not None else None, existing_king, config)
except Exception as exc:
log(f"Failed to set relationship: {exc}", file=sys.stderr)
return 1
@@ -828,11 +967,16 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
return 0
# FILE MODE: Read relationships from sidecar (legacy mode - for -path arg only)
log("Note: Use piping mode for easier relationships. Example: 1,2,3 | add-relationship", file=sys.stderr)
log(
"Note: Use piping mode for easier relationships. Example: 1,2,3 | add-relationship",
file=sys.stderr,
)
# Resolve media path from -path arg or result target
target = getattr(result, "target", None) or getattr(result, "path", None)
media_path = arg_path if arg_path is not None else Path(str(target)) if isinstance(target, str) else None
media_path = (
arg_path if arg_path is not None else Path(str(target)) if isinstance(target, str) else None
)
if media_path is None:
log("Provide -path <file> or pipe a local file result", file=sys.stderr)
return 1
@@ -869,7 +1013,9 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
return 1
# Find relationship tags (format: "relationship: hash(king)<HASH>,hash(alt)<HASH>,hash(related)<HASH>")
relationship_tags = [t for t in tags if isinstance(t, str) and t.lower().startswith("relationship:")]
relationship_tags = [
t for t in tags if isinstance(t, str) and t.lower().startswith("relationship:")
]
if not relationship_tags:
log(f"No relationship tags found in sidecar", file=sys.stderr)
@@ -913,7 +1059,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
log(
f"[add-relationship] Set {rel_type} relationship: "
f"{file_hash} <-> {related_hash}",
file=sys.stderr
file=sys.stderr,
)
success_count += 1
except Exception as exc:
@@ -925,7 +1071,10 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
error_count += 1
if success_count > 0:
log(f"Successfully set {success_count} relationship(s) for {media_path.name}", file=sys.stderr)
log(
f"Successfully set {success_count} relationship(s) for {media_path.name}",
file=sys.stderr,
)
ctx.emit(f"add-relationship: {media_path.name} ({success_count} relationships set)")
return 0
elif error_count == 0:
@@ -940,5 +1089,3 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
CMDLET.exec = _run
CMDLET.alias = ["add-rel"]
CMDLET.register()
+158 -48
View File
@@ -118,14 +118,24 @@ def _compile_extract_template(template: str) -> tuple[re.Pattern[str], List[str]
name_lower = raw_name.lower()
is_last = idx == (len(matches) - 1)
if is_last:
parts.append(fr"(?P<{raw_name}>.+)")
parts.append(rf"(?P<{raw_name}>.+)")
else:
# Heuristic: common numeric fields should capture full digit runs.
# This avoids ambiguous splits like track='2', title='3 ...'.
if name_lower in {"disk", "disc", "cd", "track", "trk", "episode", "ep", "season", "year"}:
parts.append(fr"(?P<{raw_name}>\d+)")
if name_lower in {
"disk",
"disc",
"cd",
"track",
"trk",
"episode",
"ep",
"season",
"year",
}:
parts.append(rf"(?P<{raw_name}>\d+)")
else:
parts.append(fr"(?P<{raw_name}>.+?)")
parts.append(rf"(?P<{raw_name}>.+?)")
last_end = m.end()
@@ -159,7 +169,9 @@ def _extract_tags_from_title(title_text: str, template: str) -> List[str]:
return out
def _get_title_candidates_for_extraction(res: Any, existing_tags: Optional[List[str]] = None) -> List[str]:
def _get_title_candidates_for_extraction(
res: Any, existing_tags: Optional[List[str]] = None
) -> List[str]:
"""Return a list of possible title strings in priority order."""
candidates: List[str] = []
@@ -197,7 +209,9 @@ def _get_title_candidates_for_extraction(res: Any, existing_tags: Optional[List[
return candidates
def _extract_tags_from_title_candidates(candidates: List[str], template: str) -> tuple[List[str], Optional[str]]:
def _extract_tags_from_title_candidates(
candidates: List[str], template: str
) -> tuple[List[str], Optional[str]]:
"""Try candidates in order; return (tags, matched_candidate)."""
for c in candidates:
@@ -207,7 +221,9 @@ def _extract_tags_from_title_candidates(candidates: List[str], template: str) ->
return [], None
def _try_compile_extract_template(template: Optional[str]) -> tuple[Optional[re.Pattern[str]], Optional[str]]:
def _try_compile_extract_template(
template: Optional[str],
) -> tuple[Optional[re.Pattern[str]], Optional[str]]:
"""Compile template for debug; return (pattern, error_message)."""
if template is None:
return None, None
@@ -387,7 +403,13 @@ def _refresh_result_table_title(
pass
def _refresh_tag_view(res: Any, target_hash: Optional[str], store_name: Optional[str], target_path: Optional[str], config: Dict[str, Any]) -> None:
def _refresh_tag_view(
res: Any,
target_hash: Optional[str],
store_name: Optional[str],
target_path: Optional[str],
config: Dict[str, Any],
) -> None:
"""Refresh tag display via get-tag. Prefer current subject; fall back to direct hash refresh."""
try:
from cmdlet import get as get_cmdlet # type: ignore
@@ -421,7 +443,6 @@ def _refresh_tag_view(res: Any, target_hash: Optional[str], store_name: Optional
pass
class Add_Tag(Cmdlet):
"""Class-based add-tag cmdlet with Cmdlet metadata inheritance."""
@@ -429,16 +450,42 @@ class Add_Tag(Cmdlet):
super().__init__(
name="add-tag",
summary="Add tag to a file in a store.",
usage="add-tag -store <store> [-query \"hash:<sha256>\"] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]",
usage='add-tag -store <store> [-query "hash:<sha256>"] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]',
arg=[
CmdletArg("tag", type="string", required=False, description="One or more tag to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tag from pipeline payload.", variadic=True),
CmdletArg(
"tag",
type="string",
required=False,
description="One or more tag to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tag from pipeline payload.",
variadic=True,
),
SharedArgs.QUERY,
SharedArgs.STORE,
CmdletArg("-extract", type="string", description="Extract tags from the item's title using a simple template with (field) placeholders. Example: -extract \"(artist) - (album) - (disk)-(track) (title)\" will add artist:, album:, disk:, track:, title: tags."),
CmdletArg("--extract-debug", type="flag", description="Print debug info for -extract matching (matched title source and extracted tags)."),
CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"),
CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."),
CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tag non-temporary files)."),
CmdletArg(
"-extract",
type="string",
description='Extract tags from the item\'s title using a simple template with (field) placeholders. Example: -extract "(artist) - (album) - (disk)-(track) (title)" will add artist:, album:, disk:, track:, title: tags.',
),
CmdletArg(
"--extract-debug",
type="flag",
description="Print debug info for -extract matching (matched title source and extracted tags).",
),
CmdletArg(
"-duplicate",
type="string",
description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)",
),
CmdletArg(
"-list",
type="string",
description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult).",
),
CmdletArg(
"--all",
type="flag",
description="Include temporary files in tagging (by default, only tag non-temporary files).",
),
],
detail=[
"- By default, only tag non-temporary files (from pipelines). Use --all to tag everything.",
@@ -446,7 +493,7 @@ class Add_Tag(Cmdlet):
"- If -query is not provided, uses the piped item's hash (or derives from its path when possible).",
"- Multiple tag can be comma-separated or space-separated.",
"- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult",
"- tag can also reference lists with curly braces: add-tag {philosophy} \"other:tag\"",
'- tag can also reference lists with curly braces: add-tag {philosophy} "other:tag"',
"- Use -duplicate to copy EXISTING tag values to new namespaces:",
" Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)",
" Inferred format: -duplicate title,album,artist (first is source, rest are targets)",
@@ -484,7 +531,9 @@ class Add_Tag(Cmdlet):
# @N | download-media | add-tag ... | add-file ...
store_override = parsed.get("store")
stage_ctx = ctx.get_stage_context()
has_downstream = bool(stage_ctx is not None and not getattr(stage_ctx, "is_last_stage", False))
has_downstream = bool(
stage_ctx is not None and not getattr(stage_ctx, "is_last_stage", False)
)
include_temp = bool(parsed.get("all", False))
if has_downstream and not include_temp and not store_override:
@@ -498,7 +547,10 @@ class Add_Tag(Cmdlet):
results = filter_results_by_temp(results, include_temp=False)
if not results:
log("No valid files to tag (all results were temporary; use --all to include temporary files)", file=sys.stderr)
log(
"No valid files to tag (all results were temporary; use --all to include temporary files)",
file=sys.stderr,
)
return 1
# Get tag from arguments (or fallback to pipeline payload)
@@ -536,7 +588,7 @@ class Add_Tag(Cmdlet):
# Handle -list argument (convert to {list} syntax)
list_arg = parsed.get("list")
if list_arg:
for l in list_arg.split(','):
for l in list_arg.split(","):
l = l.strip()
if l:
raw_tag.append(f"{{{l}}}")
@@ -590,46 +642,70 @@ class Add_Tag(Cmdlet):
# treat add-tag as a pipeline mutation (carry tags forward for add-file) instead of a store write.
if not store_override:
store_name_str = str(store_name) if store_name is not None else ""
local_mode_requested = (not store_name_str) or (store_name_str.upper() == "PATH") or (store_name_str.lower() == "local")
is_known_backend = bool(store_name_str) and store_registry.is_available(store_name_str)
local_mode_requested = (
(not store_name_str)
or (store_name_str.upper() == "PATH")
or (store_name_str.lower() == "local")
)
is_known_backend = bool(store_name_str) and store_registry.is_available(
store_name_str
)
if local_mode_requested and raw_path:
try:
if Path(str(raw_path)).expanduser().exists():
existing_tag_list = _extract_item_tags(res)
existing_lower = {t.lower() for t in existing_tag_list if isinstance(t, str)}
existing_lower = {
t.lower() for t in existing_tag_list if isinstance(t, str)
}
item_tag_to_add = list(tag_to_add)
if extract_template:
candidates = _get_title_candidates_for_extraction(res, existing_tag_list)
extracted, matched = _extract_tags_from_title_candidates(candidates, extract_template)
candidates = _get_title_candidates_for_extraction(
res, existing_tag_list
)
extracted, matched = _extract_tags_from_title_candidates(
candidates, extract_template
)
if extracted:
extract_matched_items += 1
if extract_debug:
log(f"[add_tag] extract matched: {matched!r} -> {extracted}", file=sys.stderr)
log(
f"[add_tag] extract matched: {matched!r} -> {extracted}",
file=sys.stderr,
)
for new_tag in extracted:
if new_tag.lower() not in existing_lower:
item_tag_to_add.append(new_tag)
else:
extract_no_match_items += 1
if extract_debug:
rx_preview = extract_debug_rx.pattern if extract_debug_rx else "<uncompiled>"
rx_preview = (
extract_debug_rx.pattern
if extract_debug_rx
else "<uncompiled>"
)
cand_preview = "; ".join([repr(c) for c in candidates[:3]])
log(f"[add_tag] extract no match for template {extract_template!r}. regex: {rx_preview!r}. candidates: {cand_preview}", file=sys.stderr)
log(
f"[add_tag] extract no match for template {extract_template!r}. regex: {rx_preview!r}. candidates: {cand_preview}",
file=sys.stderr,
)
item_tag_to_add = collapse_namespace_tag(item_tag_to_add, "title", prefer="last")
item_tag_to_add = collapse_namespace_tag(
item_tag_to_add, "title", prefer="last"
)
if duplicate_arg:
parts = str(duplicate_arg).split(':')
parts = str(duplicate_arg).split(":")
source_ns = ""
targets: list[str] = []
if len(parts) > 1:
source_ns = parts[0]
targets = [t.strip() for t in parts[1].split(',') if t.strip()]
targets = [t.strip() for t in parts[1].split(",") if t.strip()]
else:
parts2 = str(duplicate_arg).split(',')
parts2 = str(duplicate_arg).split(",")
if len(parts2) > 1:
source_ns = parts2[0]
targets = [t.strip() for t in parts2[1:] if t.strip()]
@@ -654,7 +730,10 @@ class Add_Tag(Cmdlet):
continue
ns_prefix = ns.lower() + ":"
for t in existing_tag_list:
if t.lower().startswith(ns_prefix) and t.lower() != new_tag.lower():
if (
t.lower().startswith(ns_prefix)
and t.lower() != new_tag.lower()
):
removed_namespace_tag.append(t)
removed_namespace_tag = sorted({t for t in removed_namespace_tag})
@@ -664,7 +743,9 @@ class Add_Tag(Cmdlet):
if isinstance(t, str) and t.lower() not in existing_lower
]
updated_tag_list = [t for t in existing_tag_list if t not in removed_namespace_tag]
updated_tag_list = [
t for t in existing_tag_list if t not in removed_namespace_tag
]
updated_tag_list.extend(actual_tag_to_add)
_set_item_tags(res, updated_tag_list)
@@ -672,7 +753,9 @@ class Add_Tag(Cmdlet):
_apply_title_to_result(res, final_title)
total_added += len(actual_tag_to_add)
total_modified += 1 if (removed_namespace_tag or actual_tag_to_add) else 0
total_modified += (
1 if (removed_namespace_tag or actual_tag_to_add) else 0
)
ctx.emit(res)
continue
@@ -680,14 +763,22 @@ class Add_Tag(Cmdlet):
pass
if local_mode_requested:
log("[add_tag] Error: Missing usable local path for tagging (or provide -store)", file=sys.stderr)
log(
"[add_tag] Error: Missing usable local path for tagging (or provide -store)",
file=sys.stderr,
)
return 1
if store_name_str and not is_known_backend:
log(f"[add_tag] Error: Unknown store '{store_name_str}'. Available: {store_registry.list_backends()}", file=sys.stderr)
log(
f"[add_tag] Error: Unknown store '{store_name_str}'. Available: {store_registry.list_backends()}",
file=sys.stderr,
)
return 1
resolved_hash = normalize_hash(hash_override) if hash_override else normalize_hash(raw_hash)
resolved_hash = (
normalize_hash(hash_override) if hash_override else normalize_hash(raw_hash)
)
if not resolved_hash and raw_path:
try:
p = Path(str(raw_path))
@@ -700,7 +791,10 @@ class Add_Tag(Cmdlet):
resolved_hash = None
if not resolved_hash:
log("[add_tag] Warning: Item missing usable hash (and could not derive from path); skipping", file=sys.stderr)
log(
"[add_tag] Warning: Item missing usable hash (and could not derive from path); skipping",
file=sys.stderr,
)
ctx.emit(res)
continue
@@ -724,34 +818,44 @@ class Add_Tag(Cmdlet):
if extract_template:
candidates2 = _get_title_candidates_for_extraction(res, existing_tag_list)
extracted2, matched2 = _extract_tags_from_title_candidates(candidates2, extract_template)
extracted2, matched2 = _extract_tags_from_title_candidates(
candidates2, extract_template
)
if extracted2:
extract_matched_items += 1
if extract_debug:
log(f"[add_tag] extract matched: {matched2!r} -> {extracted2}", file=sys.stderr)
log(
f"[add_tag] extract matched: {matched2!r} -> {extracted2}",
file=sys.stderr,
)
for new_tag in extracted2:
if new_tag.lower() not in existing_lower:
item_tag_to_add.append(new_tag)
else:
extract_no_match_items += 1
if extract_debug:
rx_preview2 = extract_debug_rx.pattern if extract_debug_rx else "<uncompiled>"
rx_preview2 = (
extract_debug_rx.pattern if extract_debug_rx else "<uncompiled>"
)
cand_preview2 = "; ".join([repr(c) for c in candidates2[:3]])
log(f"[add_tag] extract no match for template {extract_template!r}. regex: {rx_preview2!r}. candidates: {cand_preview2}", file=sys.stderr)
log(
f"[add_tag] extract no match for template {extract_template!r}. regex: {rx_preview2!r}. candidates: {cand_preview2}",
file=sys.stderr,
)
item_tag_to_add = collapse_namespace_tag(item_tag_to_add, "title", prefer="last")
# Handle -duplicate logic (copy existing tag to new namespaces)
if duplicate_arg:
parts = str(duplicate_arg).split(':')
parts = str(duplicate_arg).split(":")
source_ns = ""
targets: list[str] = []
if len(parts) > 1:
source_ns = parts[0]
targets = [t.strip() for t in parts[1].split(',') if t.strip()]
targets = [t.strip() for t in parts[1].split(",") if t.strip()]
else:
parts2 = str(duplicate_arg).split(',')
parts2 = str(duplicate_arg).split(",")
if len(parts2) > 1:
source_ns = parts2[0]
targets = [t.strip() for t in parts2[1:] if t.strip()]
@@ -812,9 +916,15 @@ class Add_Tag(Cmdlet):
)
if extract_template and extract_matched_items == 0:
log(f"[add_tag] extract: no matches for template '{extract_template}' across {len(results)} item(s)", file=sys.stderr)
log(
f"[add_tag] extract: no matches for template '{extract_template}' across {len(results)} item(s)",
file=sys.stderr,
)
elif extract_template and extract_no_match_items > 0 and extract_debug:
log(f"[add_tag] extract: matched {extract_matched_items}, no-match {extract_no_match_items}", file=sys.stderr)
log(
f"[add_tag] extract: matched {extract_matched_items}, no-match {extract_no_match_items}",
file=sys.stderr,
)
return 0
+20 -12
View File
@@ -53,7 +53,9 @@ class Add_Url(sh.Cmdlet):
return 1
# Bulk input is common in pipelines; treat a list of PipeObjects as a batch.
results: List[Any] = result if isinstance(result, list) else ([result] if result is not None else [])
results: List[Any] = (
result if isinstance(result, list) else ([result] if result is not None else [])
)
if query_hash and len(results) > 1:
log("Error: -query hash:<sha256> cannot be used with multiple piped items")
@@ -61,13 +63,15 @@ class Add_Url(sh.Cmdlet):
# Extract hash and store from result or args
file_hash = query_hash or (sh.get_field(result, "hash") if result is not None else None)
store_name = parsed.get("store") or (sh.get_field(result, "store") if result is not None else None)
store_name = parsed.get("store") or (
sh.get_field(result, "store") if result is not None else None
)
url_arg = parsed.get("url")
# If we have multiple piped items, we will resolve hash/store per item below.
if not results:
if not file_hash:
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
log('Error: No file hash provided (pipe an item or use -query "hash:<sha256>")')
return 1
if not store_name:
log("Error: No store name provided")
@@ -85,7 +89,7 @@ class Add_Url(sh.Cmdlet):
return 1
# Parse url (comma-separated)
urls = [u.strip() for u in str(url_arg).split(',') if u.strip()]
urls = [u.strip() for u in str(url_arg).split(",") if u.strip()]
if not urls:
log("Error: No valid url provided")
return 1
@@ -137,23 +141,30 @@ class Add_Url(sh.Cmdlet):
raw_hash = query_hash or sh.get_field(item, "hash")
raw_store = store_override or sh.get_field(item, "store")
if not raw_hash or not raw_store:
ctx.print_if_visible("[add-url] Warning: Item missing hash/store; skipping", file=sys.stderr)
ctx.print_if_visible(
"[add-url] Warning: Item missing hash/store; skipping", file=sys.stderr
)
continue
normalized = sh.normalize_hash(raw_hash)
if not normalized:
ctx.print_if_visible("[add-url] Warning: Item has invalid hash; skipping", file=sys.stderr)
ctx.print_if_visible(
"[add-url] Warning: Item has invalid hash; skipping", file=sys.stderr
)
continue
store_text = str(raw_store).strip()
if not store_text:
ctx.print_if_visible("[add-url] Warning: Item has empty store; skipping", file=sys.stderr)
ctx.print_if_visible(
"[add-url] Warning: Item has empty store; skipping", file=sys.stderr
)
continue
# Validate backend exists (skip PATH/unknown).
if not storage.is_available(store_text):
ctx.print_if_visible(
f"[add-url] Warning: Store '{store_text}' not configured; skipping", file=sys.stderr
f"[add-url] Warning: Store '{store_text}' not configured; skipping",
file=sys.stderr,
)
continue
@@ -170,7 +181,7 @@ class Add_Url(sh.Cmdlet):
merged: Dict[str, List[str]] = {}
for h, ulist in pairs:
merged.setdefault(h, [])
for u in (ulist or []):
for u in ulist or []:
if u and u not in merged[h]:
merged[h].append(u)
@@ -216,6 +227,3 @@ class Add_Url(sh.Cmdlet):
CMDLET = Add_Url()
+35 -9
View File
@@ -108,7 +108,9 @@ def _maybe_download_hydrus_item(item: Any, config: Dict[str, Any], output_dir: P
store_hint = store_lower in {"hydrus", "hydrusnetwork"} or (store_lower in hydrus_instances)
url = _extract_url(item)
file_hash = _extract_sha256_hex(item) or (_extract_hash_from_hydrus_file_url(url) if url else "")
file_hash = _extract_sha256_hex(item) or (
_extract_hash_from_hydrus_file_url(url) if url else ""
)
if not file_hash:
return None
@@ -117,7 +119,9 @@ def _maybe_download_hydrus_item(item: Any, config: Dict[str, Any], output_dir: P
if url:
try:
parsed = urlparse(url)
is_hydrus_url = (parsed.path or "").endswith("/get_files/file") and _extract_hash_from_hydrus_file_url(url) == file_hash
is_hydrus_url = (parsed.path or "").endswith(
"/get_files/file"
) and _extract_hash_from_hydrus_file_url(url) == file_hash
except Exception:
is_hydrus_url = False
if not (is_hydrus_url or store_hint):
@@ -179,12 +183,18 @@ def _maybe_download_hydrus_item(item: Any, config: Dict[str, Any], output_dir: P
return None
def _resolve_existing_or_fetch_path(item: Any, config: Dict[str, Any]) -> tuple[Path | None, Path | None]:
def _resolve_existing_or_fetch_path(
item: Any, config: Dict[str, Any]
) -> tuple[Path | None, Path | None]:
"""Return (path, temp_path) where temp_path is non-None only for files we downloaded."""
# 1) Direct local path
try:
po = coerce_to_pipe_object(item, None)
raw_path = getattr(po, "path", None) or getattr(po, "target", None) or sh.get_pipe_object_path(item)
raw_path = (
getattr(po, "path", None)
or getattr(po, "target", None)
or sh.get_pipe_object_path(item)
)
if raw_path:
p = Path(str(raw_path)).expanduser()
if p.exists():
@@ -216,9 +226,17 @@ def _resolve_existing_or_fetch_path(item: Any, config: Dict[str, Any]) -> tuple[
tmp_base = config.get("temp") if isinstance(config, dict) else None
except Exception:
tmp_base = None
out_dir = Path(str(tmp_base)).expanduser() if tmp_base else (Path(tempfile.gettempdir()) / "Medios-Macina")
out_dir = (
Path(str(tmp_base)).expanduser()
if tmp_base
else (Path(tempfile.gettempdir()) / "Medios-Macina")
)
out_dir = out_dir / "archive" / "hydrus"
downloaded = _maybe_download_hydrus_item({"hash": file_hash, "store": store_name, "url": src.strip()}, config, out_dir)
downloaded = _maybe_download_hydrus_item(
{"hash": file_hash, "store": store_name, "url": src.strip()},
config,
out_dir,
)
if downloaded is not None:
return downloaded, downloaded
except Exception:
@@ -229,7 +247,11 @@ def _resolve_existing_or_fetch_path(item: Any, config: Dict[str, Any]) -> tuple[
tmp_base = config.get("temp") if isinstance(config, dict) else None
except Exception:
tmp_base = None
out_dir = Path(str(tmp_base)).expanduser() if tmp_base else (Path(tempfile.gettempdir()) / "Medios-Macina")
out_dir = (
Path(str(tmp_base)).expanduser()
if tmp_base
else (Path(tempfile.gettempdir()) / "Medios-Macina")
)
out_dir = out_dir / "archive" / "hydrus"
downloaded = _maybe_download_hydrus_item(item, config, out_dir)
if downloaded is not None:
@@ -350,7 +372,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
with open(out_path, "wb") as out_handle:
cctx = zstd.ZstdCompressor(level=level)
with cctx.stream_writer(out_handle) as compressor:
with tarfile.open(fileobj=compressor, mode="w|", format=tarfile.PAX_FORMAT) as tf:
with tarfile.open(
fileobj=compressor, mode="w|", format=tarfile.PAX_FORMAT
) as tf:
seen_names: Set[str] = set()
for p in paths:
arcname = _unique_arcname(p.name, seen_names)
@@ -412,7 +436,9 @@ CMDLET = Cmdlet(
summary="Archive piped files into a single .tar.zst.",
usage="@N | archive-file [-level <1-22>] [-path <path>]",
arg=[
CmdletArg("-level", type="integer", description="Zstandard compression level (default: 11)."),
CmdletArg(
"-level", type="integer", description="Zstandard compression level (default: 11)."
),
SharedArgs.PATH,
],
detail=[
+62 -19
View File
@@ -1,4 +1,5 @@
"""Delete-file cmdlet: Delete files from local storage and/or Hydrus."""
from __future__ import annotations
from typing import Any, Dict, List, Sequence
@@ -23,12 +24,16 @@ class Delete_File(sh.Cmdlet):
super().__init__(
name="delete-file",
summary="Delete a file locally and/or from Hydrus, including database entries.",
usage="delete-file [-query \"hash:<sha256>\"] [-conserve <local|hydrus>] [-lib-root <path>] [reason]",
usage='delete-file [-query "hash:<sha256>"] [-conserve <local|hydrus>] [-lib-root <path>] [reason]',
alias=["del-file"],
arg=[
sh.SharedArgs.QUERY,
sh.CmdletArg("conserve", description="Choose which copy to keep: 'local' or 'hydrus'."),
sh.CmdletArg("lib-root", description="Path to local library root for database cleanup."),
sh.CmdletArg(
"conserve", description="Choose which copy to keep: 'local' or 'hydrus'."
),
sh.CmdletArg(
"lib-root", description="Path to local library root for database cleanup."
),
sh.CmdletArg("reason", description="Optional reason for deletion (free text)."),
],
detail=[
@@ -62,7 +67,11 @@ class Delete_File(sh.Cmdlet):
title_val = item.get("title") or item.get("name")
else:
hash_hex_raw = sh.get_field(item, "hash_hex") or sh.get_field(item, "hash")
target = sh.get_field(item, "target") or sh.get_field(item, "file_path") or sh.get_field(item, "path")
target = (
sh.get_field(item, "target")
or sh.get_field(item, "file_path")
or sh.get_field(item, "path")
)
title_val = sh.get_field(item, "title") or sh.get_field(item, "name")
def _get_ext_from_item() -> str:
@@ -133,7 +142,11 @@ class Delete_File(sh.Cmdlet):
is_hydrus_store = False
# Backwards-compatible fallback heuristic (older items might only carry a name).
if (not is_hydrus_store) and bool(store_lower) and ("hydrus" in store_lower or store_lower in {"home", "work"}):
if (
(not is_hydrus_store)
and bool(store_lower)
and ("hydrus" in store_lower or store_lower in {"home", "work"})
):
is_hydrus_store = True
store_label = str(store) if store else "default"
hydrus_prefix = f"[hydrusnetwork:{store_label}]"
@@ -142,10 +155,16 @@ class Delete_File(sh.Cmdlet):
if is_hydrus_store and not hash_hex_raw:
hash_hex_raw = target
hash_hex = sh.normalize_hash(override_hash) if override_hash else sh.normalize_hash(hash_hex_raw)
hash_hex = (
sh.normalize_hash(override_hash) if override_hash else sh.normalize_hash(hash_hex_raw)
)
local_deleted = False
local_target = isinstance(target, str) and target.strip() and not str(target).lower().startswith(("http://", "https://"))
local_target = (
isinstance(target, str)
and target.strip()
and not str(target).lower().startswith(("http://", "https://"))
)
deleted_rows: List[Dict[str, Any]] = []
# If this item references a configured non-Hydrus store backend, prefer deleting
@@ -169,11 +188,15 @@ class Delete_File(sh.Cmdlet):
try:
if hash_candidate and hasattr(backend, "get_file"):
candidate_path = backend.get_file(hash_candidate)
resolved_path = candidate_path if isinstance(candidate_path, Path) else None
resolved_path = (
candidate_path if isinstance(candidate_path, Path) else None
)
except Exception:
resolved_path = None
identifier = hash_candidate or (str(target).strip() if isinstance(target, str) else "")
identifier = hash_candidate or (
str(target).strip() if isinstance(target, str) else ""
)
if identifier:
deleter = getattr(backend, "delete_file", None)
if callable(deleter) and bool(deleter(identifier)):
@@ -181,18 +204,27 @@ class Delete_File(sh.Cmdlet):
size_bytes: int | None = None
try:
if resolved_path is not None and isinstance(resolved_path, Path) and resolved_path.exists():
if (
resolved_path is not None
and isinstance(resolved_path, Path)
and resolved_path.exists()
):
size_bytes = int(resolved_path.stat().st_size)
except Exception:
size_bytes = None
deleted_rows.append(
{
"title": str(title_val).strip() if title_val else (resolved_path.name if resolved_path else identifier),
"title": (
str(title_val).strip()
if title_val
else (resolved_path.name if resolved_path else identifier)
),
"store": store_label,
"hash": hash_candidate or (hash_hex or ""),
"size_bytes": size_bytes,
"ext": _get_ext_from_item() or (resolved_path.suffix.lstrip(".") if resolved_path else ""),
"ext": _get_ext_from_item()
or (resolved_path.suffix.lstrip(".") if resolved_path else ""),
}
)
@@ -312,7 +344,10 @@ class Delete_File(sh.Cmdlet):
hydrus_deleted = True
title_str = str(title_val).strip() if title_val else ""
if title_str:
debug(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}", file=sys.stderr)
debug(
f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}",
file=sys.stderr,
)
else:
debug(f"{hydrus_prefix} Deleted hash:{hash_hex}", file=sys.stderr)
else:
@@ -328,7 +363,10 @@ class Delete_File(sh.Cmdlet):
client = candidate
except Exception as exc:
if not local_deleted:
log(f"Hydrus client unavailable for store '{store}': {exc}", file=sys.stderr)
log(
f"Hydrus client unavailable for store '{store}': {exc}",
file=sys.stderr,
)
return False
if client is None:
if not local_deleted:
@@ -365,7 +403,10 @@ class Delete_File(sh.Cmdlet):
hydrus_deleted = True
title_str = str(title_val).strip() if title_val else ""
if title_str:
debug(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}", file=sys.stderr)
debug(
f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}",
file=sys.stderr,
)
else:
debug(f"{hydrus_prefix} Deleted hash:{hash_hex}", file=sys.stderr)
except Exception:
@@ -468,7 +509,9 @@ class Delete_File(sh.Cmdlet):
success_count = 0
deleted_rows: List[Dict[str, Any]] = []
for item in items:
rows = self._process_single_item(item, override_hash, conserve, lib_root, reason, config)
rows = self._process_single_item(
item, override_hash, conserve, lib_root, reason, config
)
if rows:
success_count += 1
deleted_rows.extend(rows)
@@ -481,7 +524,9 @@ class Delete_File(sh.Cmdlet):
result_row.add_column("Title", row.get("title", ""))
result_row.add_column("Store", row.get("store", ""))
result_row.add_column("Hash", row.get("hash", ""))
result_row.add_column("Size", _format_size(row.get("size_bytes"), integer_only=False))
result_row.add_column(
"Size", _format_size(row.get("size_bytes"), integer_only=False)
)
result_row.add_column("Ext", row.get("ext", ""))
# Display-only: print directly and do not affect selection/history.
@@ -504,5 +549,3 @@ class Delete_File(sh.Cmdlet):
# Instantiate and register the cmdlet
Delete_File()
+26 -8
View File
@@ -26,12 +26,14 @@ class Delete_Note(Cmdlet):
super().__init__(
name="delete-note",
summary="Delete a named note from a file in a store.",
usage="delete-note -store <store> [-query \"hash:<sha256>\"] <name>",
usage='delete-note -store <store> [-query "hash:<sha256>"] <name>',
alias=["del-note"],
arg=[
SharedArgs.STORE,
SharedArgs.QUERY,
CmdletArg("name", type="string", required=True, description="The note name/key to delete."),
CmdletArg(
"name", type="string", required=True, description="The note name/key to delete."
),
],
detail=[
"- Deletes the named note from the selected store backend.",
@@ -44,7 +46,9 @@ class Delete_Note(Cmdlet):
pass
self.register()
def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]:
def _resolve_hash(
self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]
) -> Optional[str]:
resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash)
if resolved:
return resolved
@@ -76,7 +80,10 @@ class Delete_Note(Cmdlet):
# Allow piping note rows from get-note: the selected item carries note_name.
inferred_note_name = str(get_field(result, "note_name") or "").strip()
if not note_name_override and not inferred_note_name:
log("[delete_note] Error: Requires <name> (or pipe a note row that provides note_name)", file=sys.stderr)
log(
"[delete_note] Error: Requires <name> (or pipe a note row that provides note_name)",
file=sys.stderr,
)
return 1
results = normalize_result_input(result)
@@ -84,7 +91,10 @@ class Delete_Note(Cmdlet):
if store_override and query_hash:
results = [{"store": str(store_override), "hash": query_hash}]
else:
log("[delete_note] Error: Requires piped item(s) or -store and -query \"hash:<sha256>\"", file=sys.stderr)
log(
'[delete_note] Error: Requires piped item(s) or -store and -query "hash:<sha256>"',
file=sys.stderr,
)
return 1
store_registry = Store(config)
@@ -96,9 +106,14 @@ class Delete_Note(Cmdlet):
continue
# Resolve which note name to delete for this item.
note_name = note_name_override or str(res.get("note_name") or "").strip() or inferred_note_name
note_name = (
note_name_override or str(res.get("note_name") or "").strip() or inferred_note_name
)
if not note_name:
log("[delete_note] Error: Missing note name (pass <name> or pipe a note row)", file=sys.stderr)
log(
"[delete_note] Error: Missing note name (pass <name> or pipe a note row)",
file=sys.stderr,
)
return 1
store_name = str(store_override or res.get("store") or "").strip()
@@ -106,7 +121,10 @@ class Delete_Note(Cmdlet):
raw_path = res.get("path")
if not store_name:
log("[delete_note] Error: Missing -store and item has no store field", file=sys.stderr)
log(
"[delete_note] Error: Missing -store and item has no store field",
file=sys.stderr,
)
return 1
resolved_hash = self._resolve_hash(
+68 -15
View File
@@ -30,7 +30,9 @@ def _extract_hash(item: Any) -> Optional[str]:
return normalize_hash(str(h)) if h else None
def _upsert_relationships(db: API_folder_store, file_hash: str, relationships: Dict[str, Any]) -> None:
def _upsert_relationships(
db: API_folder_store, file_hash: str, relationships: Dict[str, Any]
) -> None:
conn = db.connection
if conn is None:
raise RuntimeError("Store DB connection is not initialized")
@@ -48,7 +50,9 @@ def _upsert_relationships(db: API_folder_store, file_hash: str, relationships: D
)
def _remove_reverse_link(db: API_folder_store, *, src_hash: str, dst_hash: str, rel_type: str) -> None:
def _remove_reverse_link(
db: API_folder_store, *, src_hash: str, dst_hash: str, rel_type: str
) -> None:
meta = db.get_metadata(dst_hash) or {}
rels = meta.get("relationships") if isinstance(meta, dict) else None
if not isinstance(rels, dict) or not rels:
@@ -78,7 +82,12 @@ def _remove_reverse_link(db: API_folder_store, *, src_hash: str, dst_hash: str,
_upsert_relationships(db, dst_hash, rels)
def _refresh_relationship_view_if_current(target_hash: Optional[str], target_path: Optional[str], other: Optional[str], config: Dict[str, Any]) -> None:
def _refresh_relationship_view_if_current(
target_hash: Optional[str],
target_path: Optional[str],
other: Optional[str],
config: Dict[str, Any],
) -> None:
"""If the current subject matches the target, refresh relationships via get-relationship."""
try:
from cmdlet import get as get_cmdlet # type: ignore
@@ -179,7 +188,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if len(stores) == 1:
store_name = next(iter(stores))
elif len(stores) > 1:
log("Multiple stores detected in pipeline; use -store to choose one", file=sys.stderr)
log(
"Multiple stores detected in pipeline; use -store to choose one",
file=sys.stderr,
)
return 1
deleted_count = 0
@@ -208,7 +220,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Enforce same-store when items carry store info
item_store = get_field(single_result, "store")
if item_store and str(item_store) != str(store_name):
log(f"Cross-store delete blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr)
log(
f"Cross-store delete blocked: item store '{item_store}' != '{store_name}'",
file=sys.stderr,
)
return 1
file_hash = _extract_hash(single_result)
@@ -225,7 +240,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
except Exception:
file_hash = None
if not file_hash:
log("Could not extract file hash for deletion (use -query \"hash:<sha256>\" or ensure pipeline includes hash)", file=sys.stderr)
log(
'Could not extract file hash for deletion (use -query "hash:<sha256>" or ensure pipeline includes hash)',
file=sys.stderr,
)
return 1
meta = db.get_metadata(file_hash) or {}
@@ -241,7 +259,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
for other_hash in hashes:
other_norm = normalize_hash(str(other_hash))
if other_norm:
_remove_reverse_link(db, src_hash=file_hash, dst_hash=other_norm, rel_type=str(rt))
_remove_reverse_link(
db,
src_hash=file_hash,
dst_hash=other_norm,
rel_type=str(rt),
)
rels = {}
elif rel_type_filter:
# delete one type (case-insensitive key match)
@@ -257,13 +280,21 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
for other_hash in hashes:
other_norm = normalize_hash(str(other_hash))
if other_norm:
_remove_reverse_link(db, src_hash=file_hash, dst_hash=other_norm, rel_type=str(key_to_delete))
_remove_reverse_link(
db,
src_hash=file_hash,
dst_hash=other_norm,
rel_type=str(key_to_delete),
)
try:
del rels[key_to_delete]
except Exception:
rels[key_to_delete] = []
else:
log("Specify --all to delete all relationships or -type <type> to delete specific type", file=sys.stderr)
log(
"Specify --all to delete all relationships or -type <type> to delete specific type",
file=sys.stderr,
)
return 1
_upsert_relationships(db, file_hash, rels)
@@ -271,7 +302,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
_refresh_relationship_view_if_current(file_hash, None, None, config)
deleted_count += 1
log(f"Successfully deleted relationships from {deleted_count} file(s)", file=sys.stderr)
log(
f"Successfully deleted relationships from {deleted_count} file(s)",
file=sys.stderr,
)
return 0
except Exception as exc:
log(f"Error deleting store relationships: {exc}", file=sys.stderr)
@@ -330,7 +364,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
for other_hash in hashes:
other_norm = normalize_hash(str(other_hash))
if other_norm:
_remove_reverse_link(db, src_hash=file_hash, dst_hash=other_norm, rel_type=str(rt))
_remove_reverse_link(
db,
src_hash=file_hash,
dst_hash=other_norm,
rel_type=str(rt),
)
rels = {}
elif rel_type_filter:
key_to_delete: Optional[str] = None
@@ -345,18 +384,28 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
for other_hash in hashes:
other_norm = normalize_hash(str(other_hash))
if other_norm:
_remove_reverse_link(db, src_hash=file_hash, dst_hash=other_norm, rel_type=str(key_to_delete))
_remove_reverse_link(
db,
src_hash=file_hash,
dst_hash=other_norm,
rel_type=str(key_to_delete),
)
try:
del rels[key_to_delete]
except Exception:
rels[key_to_delete] = []
else:
log("Specify --all to delete all relationships or -type <type> to delete specific type", file=sys.stderr)
log(
"Specify --all to delete all relationships or -type <type> to delete specific type",
file=sys.stderr,
)
return 1
_upsert_relationships(db, file_hash, rels)
conn.commit()
_refresh_relationship_view_if_current(file_hash, str(file_path_obj), None, config)
_refresh_relationship_view_if_current(
file_hash, str(file_path_obj), None, config
)
deleted_count += 1
except Exception as exc:
log(f"Error deleting relationship: {exc}", file=sys.stderr)
@@ -379,7 +428,11 @@ CMDLET = Cmdlet(
SharedArgs.STORE,
SharedArgs.QUERY,
CmdletArg("all", type="flag", description="Delete all relationships for the file(s)."),
CmdletArg("type", type="string", description="Delete specific relationship type ('alt', 'king', 'related'). Default: delete all types."),
CmdletArg(
"type",
type="string",
description="Delete specific relationship type ('alt', 'king', 'related'). Default: delete all types.",
),
],
detail=[
"- Delete all relationships: pipe files | delete-relationship --all",
+65 -23
View File
@@ -20,7 +20,9 @@ from SYS.logger import debug, log
from Store import Store
def _refresh_tag_view_if_current(file_hash: str | None, store_name: str | None, path: str | None, config: Dict[str, Any]) -> None:
def _refresh_tag_view_if_current(
file_hash: str | None, store_name: str | None, path: str | None, config: Dict[str, Any]
) -> None:
"""If the current subject matches the target, refresh tags via get-tag."""
try:
from cmdlet import get as get_cmdlet # type: ignore
@@ -53,7 +55,9 @@ def _refresh_tag_view_if_current(file_hash: str | None, store_name: str | None,
subj_paths = [norm(v) for v in [subject.get("path"), subject.get("target")] if v]
else:
subj_hashes = [norm(get_field(subject, f)) for f in ("hash",) if get_field(subject, f)]
subj_paths = [norm(get_field(subject, f)) for f in ("path", "target") if get_field(subject, f)]
subj_paths = [
norm(get_field(subject, f)) for f in ("path", "target") if get_field(subject, f)
]
is_match = False
if target_hash and target_hash in subj_hashes:
@@ -76,11 +80,15 @@ def _refresh_tag_view_if_current(file_hash: str | None, store_name: str | None,
CMDLET = Cmdlet(
name="delete-tag",
summary="Remove tags from a file in a store.",
usage="delete-tag -store <store> [-query \"hash:<sha256>\"] <tag>[,<tag>...]",
usage='delete-tag -store <store> [-query "hash:<sha256>"] <tag>[,<tag>...]',
arg=[
SharedArgs.QUERY,
SharedArgs.STORE,
CmdletArg("<tag>[,<tag>...]", required=True, description="One or more tags to remove. Comma- or space-separated."),
CmdletArg(
"<tag>[,<tag>...]",
required=True,
description="One or more tags to remove. Comma- or space-separated.",
),
],
detail=[
"- Requires a Hydrus file (hash present) or explicit -query override.",
@@ -88,6 +96,7 @@ CMDLET = Cmdlet(
],
)
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
if should_show_help(args):
@@ -99,17 +108,23 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
return False
# TagItem (direct) or PipeObject/dict emitted from get-tag table rows.
try:
if hasattr(obj, '__class__') and obj.__class__.__name__ == 'TagItem' and hasattr(obj, 'tag_name'):
if (
hasattr(obj, "__class__")
and obj.__class__.__name__ == "TagItem"
and hasattr(obj, "tag_name")
):
return True
except Exception:
pass
try:
return bool(get_field(obj, 'tag_name'))
return bool(get_field(obj, "tag_name"))
except Exception:
return False
has_piped_tag = _looks_like_tag_row(result)
has_piped_tag_list = isinstance(result, list) and bool(result) and _looks_like_tag_row(result[0])
has_piped_tag_list = (
isinstance(result, list) and bool(result) and _looks_like_tag_row(result[0])
)
# Parse -query/-store overrides and collect remaining args.
override_query: str | None = None
@@ -151,8 +166,17 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
grouped_table = ""
grouped_tags = get_field(result, "tag") if result is not None else None
tags_arg = parse_tag_arguments(rest)
if grouped_table == "tag.selection" and isinstance(grouped_tags, list) and grouped_tags and not tags_arg:
file_hash = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash"))
if (
grouped_table == "tag.selection"
and isinstance(grouped_tags, list)
and grouped_tags
and not tags_arg
):
file_hash = (
normalize_hash(override_hash)
if override_hash
else normalize_hash(get_field(result, "hash"))
)
store_name = override_store or get_field(result, "store")
path = get_field(result, "path") or get_field(result, "target")
tags = [str(t) for t in grouped_tags if t]
@@ -186,7 +210,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
tag_name = get_field(item, "tag_name")
if not tag_name:
continue
item_hash = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(item, "hash"))
item_hash = (
normalize_hash(override_hash)
if override_hash
else normalize_hash(get_field(item, "hash"))
)
item_store = override_store or get_field(item, "store")
item_path = get_field(item, "path") or get_field(item, "target")
key = (str(item_hash or ""), str(item_store or ""), str(item_path or ""))
@@ -216,18 +244,19 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# If items are TagItems and no args, the tag to delete is the item itself
for item in items_to_process:
tags_to_delete: list[str] = []
item_hash = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(item, "hash"))
item_path = (
get_field(item, "path")
or get_field(item, "target")
item_hash = (
normalize_hash(override_hash)
if override_hash
else normalize_hash(get_field(item, "hash"))
)
item_path = get_field(item, "path") or get_field(item, "target")
item_store = override_store or get_field(item, "store")
if _looks_like_tag_row(item):
if tags_arg:
tags_to_delete = tags_arg
else:
tag_name = get_field(item, 'tag_name')
tag_name = get_field(item, "tag_name")
if tag_name:
tags_to_delete = [str(tag_name)]
else:
@@ -244,7 +273,14 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
return 0
return 1
def _process_deletion(tags: list[str], file_hash: str | None, path: str | None, store_name: str | None, config: Dict[str, Any]) -> bool:
def _process_deletion(
tags: list[str],
file_hash: str | None,
path: str | None,
store_name: str | None,
config: Dict[str, Any],
) -> bool:
"""Helper to execute the deletion logic for a single target."""
if not tags:
@@ -258,12 +294,16 @@ def _process_deletion(tags: list[str], file_hash: str | None, path: str | None,
if not resolved_hash and path:
try:
from SYS.utils import sha256_file
resolved_hash = sha256_file(Path(path))
except Exception:
resolved_hash = None
if not resolved_hash:
log("Item does not include a usable hash (and hash could not be derived from path)", file=sys.stderr)
log(
"Item does not include a usable hash (and hash could not be derived from path)",
file=sys.stderr,
)
return False
def _fetch_existing_tags() -> list[str]:
@@ -278,18 +318,23 @@ def _process_deletion(tags: list[str], file_hash: str | None, path: str | None,
title_tags = [t for t in tags if isinstance(t, str) and t.lower().startswith("title:")]
if title_tags:
existing_tags = _fetch_existing_tags()
current_titles = [t for t in existing_tags if isinstance(t, str) and t.lower().startswith("title:")]
current_titles = [
t for t in existing_tags if isinstance(t, str) and t.lower().startswith("title:")
]
del_title_set = {t.lower() for t in title_tags}
remaining_titles = [t for t in current_titles if t.lower() not in del_title_set]
if current_titles and not remaining_titles:
log("Cannot delete the last title: tag. Add a replacement title first (add-tags \"title:new title\").", file=sys.stderr)
log(
'Cannot delete the last title: tag. Add a replacement title first (add-tags "title:new title").',
file=sys.stderr,
)
return False
try:
backend = Store(config)[store_name]
ok = backend.delete_tag(resolved_hash, list(tags), config=config)
if ok:
preview = resolved_hash[:12] + ('' if len(resolved_hash) > 12 else '')
preview = resolved_hash[:12] + ("" if len(resolved_hash) > 12 else "")
debug(f"Removed {len(tags)} tag(s) from {preview} via store '{store_name}'.")
_refresh_tag_view_if_current(resolved_hash, store_name, path, config)
return True
@@ -302,6 +347,3 @@ def _process_deletion(tags: list[str], file_hash: str | None, path: str | None,
# Register cmdlet (no legacy decorator)
CMDLET.exec = _run
CMDLET.register()
+52 -14
View File
@@ -29,7 +29,11 @@ class Delete_Url(Cmdlet):
arg=[
SharedArgs.QUERY,
SharedArgs.STORE,
CmdletArg("url", required=False, description="URL to remove (optional when piping url rows)"),
CmdletArg(
"url",
required=False,
description="URL to remove (optional when piping url rows)",
),
],
detail=[
"- Removes URL association from file identified by hash+store",
@@ -49,7 +53,9 @@ class Delete_Url(Cmdlet):
return 1
# Bulk input is common in pipelines; treat a list of PipeObjects as a batch.
results: List[Any] = result if isinstance(result, list) else ([result] if result is not None else [])
results: List[Any] = (
result if isinstance(result, list) else ([result] if result is not None else [])
)
if query_hash and len(results) > 1:
log("Error: -query hash:<sha256> cannot be used with multiple piped items")
@@ -57,13 +63,15 @@ class Delete_Url(Cmdlet):
# Extract hash and store from result or args
file_hash = query_hash or (get_field(result, "hash") if result is not None else None)
store_name = parsed.get("store") or (get_field(result, "store") if result is not None else None)
store_name = parsed.get("store") or (
get_field(result, "store") if result is not None else None
)
url_arg = parsed.get("url")
# If we have multiple piped items, we will resolve hash/store per item below.
if not results:
if not file_hash:
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
log('Error: No file hash provided (pipe an item or use -query "hash:<sha256>")')
return 1
if not store_name:
log("Error: No store name provided")
@@ -129,21 +137,29 @@ class Delete_Url(Cmdlet):
raw_hash = query_hash or get_field(item, "hash")
raw_store = store_override or get_field(item, "store")
if not raw_hash or not raw_store:
ctx.print_if_visible("[delete-url] Warning: Item missing hash/store; skipping", file=sys.stderr)
ctx.print_if_visible(
"[delete-url] Warning: Item missing hash/store; skipping",
file=sys.stderr,
)
continue
normalized = normalize_hash(raw_hash)
if not normalized:
ctx.print_if_visible("[delete-url] Warning: Item has invalid hash; skipping", file=sys.stderr)
ctx.print_if_visible(
"[delete-url] Warning: Item has invalid hash; skipping", file=sys.stderr
)
continue
store_text = str(raw_store).strip()
if not store_text:
ctx.print_if_visible("[delete-url] Warning: Item has empty store; skipping", file=sys.stderr)
ctx.print_if_visible(
"[delete-url] Warning: Item has empty store; skipping", file=sys.stderr
)
continue
if not storage.is_available(store_text):
ctx.print_if_visible(
f"[delete-url] Warning: Store '{store_text}' not configured; skipping", file=sys.stderr
f"[delete-url] Warning: Store '{store_text}' not configured; skipping",
file=sys.stderr,
)
continue
@@ -152,9 +168,17 @@ class Delete_Url(Cmdlet):
# - Otherwise, when piping url rows from get-url, delete the url(s) from each item.
item_urls = list(urls_from_cli)
if not item_urls:
item_urls = [u.strip() for u in normalize_urls(get_field(item, "url") or get_field(item, "source_url")) if str(u).strip()]
item_urls = [
u.strip()
for u in normalize_urls(
get_field(item, "url") or get_field(item, "source_url")
)
if str(u).strip()
]
if not item_urls:
ctx.print_if_visible("[delete-url] Warning: Item has no url field; skipping", file=sys.stderr)
ctx.print_if_visible(
"[delete-url] Warning: Item has no url field; skipping", file=sys.stderr
)
continue
batch.setdefault(store_text, []).append((normalized, item_urls))
@@ -168,7 +192,7 @@ class Delete_Url(Cmdlet):
merged: Dict[str, List[str]] = {}
for h, ulist in pairs:
merged.setdefault(h, [])
for u in (ulist or []):
for u in ulist or []:
if u and u not in merged[h]:
merged[h].append(u)
bulk_pairs = [(h, merged[h]) for h in merged.keys()]
@@ -195,21 +219,35 @@ class Delete_Url(Cmdlet):
# remove the piped url row(s).
remove_set = urls_from_cli
if not remove_set:
remove_set = [u.strip() for u in normalize_urls(get_field(item, "url") or get_field(item, "source_url")) if str(u).strip()]
remove_set = [
u.strip()
for u in normalize_urls(
get_field(item, "url") or get_field(item, "source_url")
)
if str(u).strip()
]
_set_item_url(item, _remove_urls(existing, list(remove_set)))
ctx.emit(item)
return 0
# Single-item mode
if not urls_from_cli:
urls_from_cli = [u.strip() for u in normalize_urls(get_field(result, "url") or get_field(result, "source_url")) if str(u).strip()]
urls_from_cli = [
u.strip()
for u in normalize_urls(
get_field(result, "url") or get_field(result, "source_url")
)
if str(u).strip()
]
if not urls_from_cli:
log("Error: No URL provided")
return 1
backend = storage[str(store_name)]
backend.delete_url(str(file_hash), list(urls_from_cli), config=config)
ctx.print_if_visible(f"✓ delete-url: {len(urls_from_cli)} url(s) removed", file=sys.stderr)
ctx.print_if_visible(
f"✓ delete-url: {len(urls_from_cli)} url(s) removed", file=sys.stderr
)
if result is not None:
existing = get_field(result, "url")
_set_item_url(result, _remove_urls(existing, list(urls_from_cli)))
+155 -40
View File
@@ -44,8 +44,12 @@ class Download_File(Cmdlet):
SharedArgs.URL,
SharedArgs.PATH,
# Prefer -path for output directory to match other cmdlets; keep -output for backwards compatibility.
CmdletArg(name="-output", type="string", alias="o", description="(deprecated) Output directory (use -path instead)"),
CmdletArg(
name="-output",
type="string",
alias="o",
description="(deprecated) Output directory (use -path instead)",
),
],
detail=[
"Download files directly via HTTP without yt-dlp processing.",
@@ -67,7 +71,7 @@ class Download_File(Cmdlet):
raw_url = [raw_url]
expanded_urls: List[str] = []
for u in (raw_url or []):
for u in raw_url or []:
if u is None:
continue
s = str(u).strip()
@@ -99,7 +103,9 @@ class Download_File(Cmdlet):
return 1
@staticmethod
def _build_preview(raw_urls: Sequence[str], piped_items: Sequence[Any], total_items: int) -> List[Any]:
def _build_preview(
raw_urls: Sequence[str], piped_items: Sequence[Any], total_items: int
) -> List[Any]:
try:
preview: List[Any] = []
preview.extend(list(raw_urls or [])[: max(0, total_items)])
@@ -212,7 +218,11 @@ class Download_File(Cmdlet):
title = str(get_field(item, "title") or "").strip() if item is not None else ""
except Exception:
title = ""
table_title = f"Internet Archive: {title}".strip().rstrip(":") if title else f"Internet Archive: {identifier}"
table_title = (
f"Internet Archive: {title}".strip().rstrip(":")
if title
else f"Internet Archive: {identifier}"
)
try:
from result_table import ResultTable
@@ -329,7 +339,9 @@ class Download_File(Cmdlet):
config: Dict[str, Any],
provider_hint: Optional[str] = None,
) -> None:
title_val = (title_hint or downloaded_path.stem or "Unknown").strip() or downloaded_path.stem
title_val = (
title_hint or downloaded_path.stem or "Unknown"
).strip() or downloaded_path.stem
hash_value = self._compute_file_hash(downloaded_path)
tag: List[str] = []
if tags_hint:
@@ -406,9 +418,13 @@ class Download_File(Cmdlet):
provider = _get_provider("telegram", config)
if provider is None:
raise DownloadError("Telegram provider not configured or not available (check telethon/app_id/api_hash)")
raise DownloadError(
"Telegram provider not configured or not available (check telethon/app_id/api_hash)"
)
sr = SearchResult(table="telegram", title=str(url), path=str(url), full_metadata={})
sr = SearchResult(
table="telegram", title=str(url), path=str(url), full_metadata={}
)
downloaded_path = None
telegram_info: Optional[Dict[str, Any]] = None
if hasattr(provider, "download_url"):
@@ -428,9 +444,15 @@ class Download_File(Cmdlet):
try:
chat_info_raw = telegram_info.get("chat")
msg_info_raw = telegram_info.get("message")
chat_info: Dict[str, Any] = chat_info_raw if isinstance(chat_info_raw, dict) else {}
msg_info: Dict[str, Any] = msg_info_raw if isinstance(msg_info_raw, dict) else {}
channel = str(chat_info.get("title") or chat_info.get("username") or "").strip()
chat_info: Dict[str, Any] = (
chat_info_raw if isinstance(chat_info_raw, dict) else {}
)
msg_info: Dict[str, Any] = (
msg_info_raw if isinstance(msg_info_raw, dict) else {}
)
channel = str(
chat_info.get("title") or chat_info.get("username") or ""
).strip()
post = msg_info.get("id")
except Exception:
channel = ""
@@ -479,7 +501,16 @@ class Download_File(Cmdlet):
p = urlparse(str(url))
h = (p.hostname or "").strip().lower()
path = (p.path or "").strip().lower()
if "libgen" in h and any(x in path for x in ("/edition.php", "/file.php", "/ads.php", "/get.php", "/series.php")):
if "libgen" in h and any(
x in path
for x in (
"/edition.php",
"/file.php",
"/ads.php",
"/get.php",
"/series.php",
)
):
provider_name = "libgen"
except Exception:
pass
@@ -489,7 +520,9 @@ class Download_File(Cmdlet):
if provider_name == "openlibrary":
provider = get_provider("openlibrary", config)
if provider is None:
raise DownloadError("OpenLibrary provider not configured or not available")
raise DownloadError(
"OpenLibrary provider not configured or not available"
)
edition_id = self._openlibrary_edition_id_from_url(str(url))
title_hint = self._title_hint_from_url_slug(str(url))
@@ -512,7 +545,9 @@ class Download_File(Cmdlet):
# High-level steps for OpenLibrary borrow/download flow.
progress.begin_steps(5)
def _progress(kind: str, done: int, total: Optional[int], label: str) -> None:
def _progress(
kind: str, done: int, total: Optional[int], label: str
) -> None:
# kind:
# - "step": advance step text
# - "pages": update pipe percent/status
@@ -525,7 +560,9 @@ class Download_File(Cmdlet):
t = int(total) if isinstance(total, int) else 0
d = int(done) if isinstance(done, int) else 0
if t > 0:
pct = int(round((max(0, min(d, t)) / max(1, t)) * 100.0))
pct = int(
round((max(0, min(d, t)) / max(1, t)) * 100.0)
)
progress.set_percent(pct)
progress.set_status(f"downloading pages {d}/{t}")
else:
@@ -538,9 +575,15 @@ class Download_File(Cmdlet):
except Exception:
lbl = "download"
progress.begin_transfer(label=lbl, total=total)
progress.update_transfer(label=lbl, completed=done, total=total)
progress.update_transfer(
label=lbl, completed=done, total=total
)
try:
if isinstance(total, int) and total > 0 and int(done) >= int(total):
if (
isinstance(total, int)
and total > 0
and int(done) >= int(total)
):
progress.finish_transfer(label=lbl)
except Exception:
pass
@@ -590,12 +633,18 @@ class Download_File(Cmdlet):
exec_fn = getattr(_SEARCH_PROVIDER_CMDLET, "exec", None)
if callable(exec_fn):
ret = exec_fn(None, ["-provider", "libgen", "-query", fallback_query], config)
ret = exec_fn(
None,
["-provider", "libgen", "-query", fallback_query],
config,
)
try:
table = pipeline_context.get_last_result_table()
items = pipeline_context.get_last_result_items()
if table is not None:
pipeline_context.set_last_result_table_overlay(table, items)
pipeline_context.set_last_result_table_overlay(
table, items
)
except Exception:
pass
@@ -606,7 +655,10 @@ class Download_File(Cmdlet):
except Exception:
pass
log("[download-file] OpenLibrary URL could not be downloaded", file=sys.stderr)
log(
"[download-file] OpenLibrary URL could not be downloaded",
file=sys.stderr,
)
continue
# Generic provider URL handler (if a provider implements `download_url`).
@@ -734,11 +786,18 @@ class Download_File(Cmdlet):
full_metadata = get_field(item, "full_metadata")
target = get_field(item, "path") or get_field(item, "url")
if str(table or "").lower() == "alldebrid" and str(media_kind or "").lower() == "folder":
if (
str(table or "").lower() == "alldebrid"
and str(media_kind or "").lower() == "folder"
):
magnet_id = None
if isinstance(full_metadata, dict):
magnet_id = full_metadata.get("magnet_id")
if magnet_id is None and isinstance(target, str) and target.lower().startswith("alldebrid:magnet:"):
if (
magnet_id is None
and isinstance(target, str)
and target.lower().startswith("alldebrid:magnet:")
):
try:
magnet_id = int(target.split(":")[-1])
except Exception:
@@ -748,12 +807,20 @@ class Download_File(Cmdlet):
provider = get_search_provider("alldebrid", config)
if provider is not None:
try:
files = provider.search("*", limit=10_000, filters={"view": "files", "magnet_id": int(magnet_id)})
files = provider.search(
"*",
limit=10_000,
filters={"view": "files", "magnet_id": int(magnet_id)},
)
except Exception:
files = []
# If the magnet isn't ready, provider.search returns a single not-ready folder row.
if files and len(files) == 1 and getattr(files[0], "media_kind", "") == "folder":
if (
files
and len(files) == 1
and getattr(files[0], "media_kind", "") == "folder"
):
detail = getattr(files[0], "detail", "")
log(
f"[download-file] AllDebrid magnet {magnet_id} not ready ({detail or 'unknown'})",
@@ -761,7 +828,9 @@ class Download_File(Cmdlet):
)
else:
for sr in files:
expanded_items.append(sr.to_dict() if hasattr(sr, "to_dict") else sr)
expanded_items.append(
sr.to_dict() if hasattr(sr, "to_dict") else sr
)
continue
expanded_items.append(item)
@@ -784,7 +853,9 @@ class Download_File(Cmdlet):
get_search_provider = registry.get("get_search_provider")
SearchResult = registry.get("SearchResult")
expanded_items = self._expand_provider_items(piped_items=piped_items, registry=registry, config=config)
expanded_items = self._expand_provider_items(
piped_items=piped_items, registry=registry, config=config
)
for item in expanded_items:
try:
@@ -800,7 +871,11 @@ class Download_File(Cmdlet):
tags_list = None
full_metadata = get_field(item, "full_metadata")
if (not full_metadata) and isinstance(item, dict) and isinstance(item.get("extra"), dict):
if (
(not full_metadata)
and isinstance(item, dict)
and isinstance(item.get("extra"), dict)
):
extra_md = item["extra"].get("full_metadata")
if isinstance(extra_md, dict):
full_metadata = extra_md
@@ -832,7 +907,9 @@ class Download_File(Cmdlet):
if isinstance(md, dict):
magnet_name = md.get("magnet_name") or md.get("folder")
if not magnet_name:
magnet_name = str(get_field(item, "detail") or "").strip() or None
magnet_name = (
str(get_field(item, "detail") or "").strip() or None
)
magnet_dir_name = _sf(str(magnet_name)) if magnet_name else ""
@@ -845,7 +922,9 @@ class Download_File(Cmdlet):
base_tail_norm = _sf(base_tail).lower() if base_tail.strip() else ""
magnet_dir_norm = magnet_dir_name.lower() if magnet_dir_name else ""
if magnet_dir_name and (not base_tail_norm or base_tail_norm != magnet_dir_norm):
if magnet_dir_name and (
not base_tail_norm or base_tail_norm != magnet_dir_norm
):
output_dir = Path(output_dir) / magnet_dir_name
relpath = None
@@ -855,7 +934,11 @@ class Download_File(Cmdlet):
relpath = md["file"].get("_relpath")
if relpath:
parts = [p for p in str(relpath).replace("\\", "/").split("/") if p and p not in {".", ".."}]
parts = [
p
for p in str(relpath).replace("\\", "/").split("/")
if p and p not in {".", ".."}
]
# If the provider relpath already includes the magnet folder name as a
# root directory (common), strip it to prevent double nesting.
@@ -881,7 +964,11 @@ class Download_File(Cmdlet):
provider_sr = sr
# OpenLibrary: if provider download failed, do NOT try to download the OpenLibrary page HTML.
if downloaded_path is None and attempted_provider_download and str(table or "").lower() == "openlibrary":
if (
downloaded_path is None
and attempted_provider_download
and str(table or "").lower() == "openlibrary"
):
availability = None
reason = None
if isinstance(full_metadata, dict):
@@ -898,7 +985,10 @@ class Download_File(Cmdlet):
if not title_text and isinstance(full_metadata, dict):
title_text = str(full_metadata.get("title") or "").strip()
if title_text:
log(f"[download-file] Not available on OpenLibrary; searching LibGen for: {title_text}", file=sys.stderr)
log(
f"[download-file] Not available on OpenLibrary; searching LibGen for: {title_text}",
file=sys.stderr,
)
from cmdlet.search_provider import CMDLET as _SEARCH_PROVIDER_CMDLET
fallback_query = title_text
@@ -921,7 +1011,9 @@ class Download_File(Cmdlet):
table_obj = pipeline_context.get_last_result_table()
items_obj = pipeline_context.get_last_result_items()
if table_obj is not None:
pipeline_context.set_last_result_table_overlay(table_obj, items_obj)
pipeline_context.set_last_result_table_overlay(
table_obj, items_obj
)
except Exception:
pass
@@ -935,7 +1027,11 @@ class Download_File(Cmdlet):
continue
# Fallback: if we have a direct HTTP URL, download it directly
if downloaded_path is None and isinstance(target, str) and target.startswith("http"):
if (
downloaded_path is None
and isinstance(target, str)
and target.startswith("http")
):
# Guard: provider landing pages (e.g. LibGen ads.php) are HTML, not files.
# Never download these as "files".
if str(table or "").lower() == "libgen":
@@ -946,7 +1042,9 @@ class Download_File(Cmdlet):
file=sys.stderr,
)
continue
debug(f"[download-file] Provider item looks like direct URL, downloading: {target}")
debug(
f"[download-file] Provider item looks like direct URL, downloading: {target}"
)
suggested_name = str(title).strip() if title is not None else None
result_obj = _download_direct_file(
target,
@@ -958,7 +1056,10 @@ class Download_File(Cmdlet):
downloaded_path = self._path_from_download_result(result_obj)
if downloaded_path is None:
log(f"Cannot download item (no provider handler / unsupported target): {title or target}", file=sys.stderr)
log(
f"Cannot download item (no provider handler / unsupported target): {title or target}",
file=sys.stderr,
)
continue
# Allow providers to add/enrich tags and metadata during download.
@@ -1038,11 +1139,19 @@ class Download_File(Cmdlet):
# UX: In piped mode, allow a single positional arg to be the destination directory.
# Example: @1-4 | download-file "C:\\Users\\Me\\Downloads\\yoyo"
if had_piped_input and raw_url and len(raw_url) == 1 and (not parsed.get("path")) and (not parsed.get("output")):
if (
had_piped_input
and raw_url
and len(raw_url) == 1
and (not parsed.get("path"))
and (not parsed.get("output"))
):
candidate = str(raw_url[0] or "").strip()
low = candidate.lower()
looks_like_url = low.startswith(("http://", "https://", "ftp://"))
looks_like_provider = low.startswith(("magnet:", "alldebrid:", "hydrus:", "ia:", "internetarchive:"))
looks_like_provider = low.startswith(
("magnet:", "alldebrid:", "hydrus:", "ia:", "internetarchive:")
)
looks_like_windows_path = (
(len(candidate) >= 2 and candidate[1] == ":")
or candidate.startswith("\\\\")
@@ -1058,7 +1167,9 @@ class Download_File(Cmdlet):
log("No url or piped items to download", file=sys.stderr)
return 1
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
quiet_mode = (
bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
)
ia_picker_exit = self._maybe_show_internetarchive_formats(
raw_urls=raw_url,
piped_items=piped_items,
@@ -1082,7 +1193,9 @@ class Download_File(Cmdlet):
total_items = self._safe_total_items(raw_url, piped_items)
preview = self._build_preview(raw_url, piped_items, total_items)
progress.ensure_local_ui(label="download-file", total_items=total_items, items_preview=preview)
progress.ensure_local_ui(
label="download-file", total_items=total_items, items_preview=preview
)
registry = self._load_provider_registry()
@@ -1155,6 +1268,7 @@ class Download_File(Cmdlet):
# Priority 2: Config default output/temp directory
try:
from config import resolve_output_dir
final_output_dir = resolve_output_dir(config)
except Exception:
final_output_dir = Path.home() / "Downloads"
@@ -1173,6 +1287,7 @@ class Download_File(Cmdlet):
def _compute_file_hash(self, filepath: Path) -> str:
"""Compute SHA256 hash of a file."""
import hashlib
sha256_hash = hashlib.sha256()
with open(filepath, "rb") as f:
for byte_block in iter(lambda: f.read(4096), b""):
+321 -91
View File
@@ -35,6 +35,9 @@ import pipeline as pipeline_context
from result_table import ResultTable
from rich.prompt import Confirm
from rich_display import stderr_console as get_stderr_console
from . import _shared as sh
QueryArg = sh.QueryArg
from tool.ytdlp import YtDlpTool
@@ -53,13 +56,21 @@ get_field = sh.get_field
def _live_ui_and_pipe_index() -> tuple[Optional[Any], int]:
ui = None
try:
ui = pipeline_context.get_live_progress() if hasattr(pipeline_context, "get_live_progress") else None
ui = (
pipeline_context.get_live_progress()
if hasattr(pipeline_context, "get_live_progress")
else None
)
except Exception:
ui = None
pipe_idx: int = 0
try:
stage_ctx = pipeline_context.get_stage_context() if hasattr(pipeline_context, "get_stage_context") else None
stage_ctx = (
pipeline_context.get_stage_context()
if hasattr(pipeline_context, "get_stage_context")
else None
)
maybe_idx = getattr(stage_ctx, "pipe_index", None) if stage_ctx is not None else None
if isinstance(maybe_idx, int):
pipe_idx = int(maybe_idx)
@@ -207,7 +218,9 @@ def _format_chapters_note(info: Dict[str, Any]) -> Optional[str]:
return f"{minutes:02d}:{secs:02d}"
lines: List[str] = []
for start_s, end_s, title in sorted(rows, key=lambda r: (r[0], r[1] if r[1] is not None else 10**9, r[2])):
for start_s, end_s, title in sorted(
rows, key=lambda r: (r[0], r[1] if r[1] is not None else 10**9, r[2])
):
if end_s is not None and end_s > start_s:
prefix = f"{_tc(start_s)}-{_tc(end_s)}"
else:
@@ -353,7 +366,9 @@ def _download_with_sections_via_cli(
if not sections_list:
return "", {}
session_id = hashlib.md5((url + str(time.time()) + "".join(random.choices(string.ascii_letters, k=10))).encode()).hexdigest()[:12]
session_id = hashlib.md5(
(url + str(time.time()) + "".join(random.choices(string.ascii_letters, k=10))).encode()
).hexdigest()[:12]
first_section_info = None
total_sections = len(sections_list)
@@ -457,10 +472,12 @@ def _download_with_sections_via_cli(
else:
subprocess.run(cmd, check=True)
except subprocess.CalledProcessError as exc:
stderr_text = (exc.stderr or "")
stderr_text = exc.stderr or ""
tail = "\n".join(stderr_text.splitlines()[-12:]).strip()
details = f"\n{tail}" if tail else ""
raise DownloadError(f"yt-dlp failed for section {section} (exit {exc.returncode}){details}") from exc
raise DownloadError(
f"yt-dlp failed for section {section} (exit {exc.returncode}){details}"
) from exc
except Exception as exc:
raise DownloadError(f"yt-dlp failed for section {section}: {exc}") from exc
@@ -518,7 +535,9 @@ def _resolve_entry_and_path(info: Dict[str, Any], output_dir: Path) -> tuple[Dic
raise FileNotFoundError("yt-dlp did not report a downloaded media file")
def _resolve_entries_and_paths(info: Dict[str, Any], output_dir: Path) -> List[tuple[Dict[str, Any], Path]]:
def _resolve_entries_and_paths(
info: Dict[str, Any], output_dir: Path
) -> List[tuple[Dict[str, Any], Path]]:
resolved: List[tuple[Dict[str, Any], Path]] = []
seen: set[str] = set()
for entry in _iter_download_entries(info):
@@ -558,7 +577,6 @@ def _extract_sha256(info: Dict[str, Any]) -> Optional[str]:
return None
def _progress_callback(status: Dict[str, Any]) -> None:
"""Simple progress callback using logger."""
event = status.get("status")
@@ -582,7 +600,13 @@ def _progress_callback(status: Dict[str, Any]) -> None:
return
def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15, *, cookiefile: Optional[str] = None) -> Optional[Dict[str, Any]]:
def probe_url(
url: str,
no_playlist: bool = False,
timeout_seconds: int = 15,
*,
cookiefile: Optional[str] = None,
) -> Optional[Dict[str, Any]]:
"""Probe URL to extract metadata WITHOUT downloading.
Args:
@@ -710,7 +734,9 @@ def download_media(
# Skip probe for playlists with item selection (probe can hang on large playlists)
# Just proceed straight to download which will handle item selection
if opts.playlist_items:
debug(f"Skipping probe for playlist (item selection: {opts.playlist_items}), proceeding with download")
debug(
f"Skipping probe for playlist (item selection: {opts.playlist_items}), proceeding with download"
)
probe_result = {"url": opts.url} # Minimal probe result
else:
probe_cookiefile = None
@@ -720,7 +746,9 @@ def download_media(
except Exception:
probe_cookiefile = None
probe_result = probe_url(opts.url, no_playlist=opts.no_playlist, timeout_seconds=15, cookiefile=probe_cookiefile)
probe_result = probe_url(
opts.url, no_playlist=opts.no_playlist, timeout_seconds=15, cookiefile=probe_cookiefile
)
if probe_result is None:
msg = "yt-dlp could not detect media for this URL; use download-file for direct downloads"
@@ -751,7 +779,9 @@ def download_media(
if not opts.quiet:
if ytdl_options.get("download_sections"):
debug(f"[yt-dlp] download_sections: {ytdl_options['download_sections']}")
debug(f"[yt-dlp] force_keyframes_at_cuts: {ytdl_options.get('force_keyframes_at_cuts', False)}")
debug(
f"[yt-dlp] force_keyframes_at_cuts: {ytdl_options.get('force_keyframes_at_cuts', False)}"
)
# Use subprocess when download_sections are present (Python API doesn't support them properly)
session_id = None
@@ -809,7 +839,7 @@ def download_media(
if opts.clip_sections and session_id:
# Pattern: "{session_id}_1.ext", "{session_id}_2.ext", etc.
# Also includes sidecars like "{session_id}_1.en.vtt".
section_pattern = re.compile(rf'^{re.escape(session_id)}_(\d+)')
section_pattern = re.compile(rf"^{re.escape(session_id)}_(\d+)")
matching_files = [f for f in files if section_pattern.search(f.name)]
if matching_files:
@@ -882,7 +912,7 @@ def download_media(
def _tail(name: str) -> str:
try:
if name.startswith(prefix):
return name[len(prefix):]
return name[len(prefix) :]
except Exception:
pass
# Fallback: keep just the last suffix.
@@ -896,14 +926,18 @@ def download_media(
new_media_name = f"{media_hash}{_tail(media_file.name)}"
new_media_path = opts.output_dir / new_media_name
if new_media_path.exists() and new_media_path != media_file:
debug(f"File with hash {media_hash} already exists, using existing file.")
debug(
f"File with hash {media_hash} already exists, using existing file."
)
try:
media_file.unlink()
except OSError:
pass
else:
media_file.rename(new_media_path)
debug(f"Renamed section file: {media_file.name} -> {new_media_name}")
debug(
f"Renamed section file: {media_file.name} -> {new_media_name}"
)
renamed_media_files.append(new_media_path)
except Exception as e:
debug(f"Failed to rename section media file {media_file.name}: {e}")
@@ -922,11 +956,17 @@ def download_media(
pass
else:
sub_file.rename(new_sub_path)
debug(f"Renamed section file: {sub_file.name} -> {new_sub_name}")
debug(
f"Renamed section file: {sub_file.name} -> {new_sub_name}"
)
except Exception as e:
debug(f"Failed to rename section subtitle file {sub_file.name}: {e}")
debug(
f"Failed to rename section subtitle file {sub_file.name}: {e}"
)
media_path = renamed_media_files[0] if renamed_media_files else matching_files[0]
media_path = (
renamed_media_files[0] if renamed_media_files else matching_files[0]
)
media_paths = renamed_media_files if renamed_media_files else None
if not opts.quiet:
count = len(media_paths) if isinstance(media_paths, list) else 1
@@ -960,11 +1000,11 @@ def download_media(
# For section downloads, create tags with the title and build proper info dict
tags = []
title = ''
title = ""
if first_section_info:
title = first_section_info.get('title', '')
title = first_section_info.get("title", "")
if title:
tags.append(f'title:{title}')
tags.append(f"title:{title}")
debug(f"Added title tag for section download: {title}")
# Build info dict - always use extracted title if available, not hash
@@ -974,7 +1014,7 @@ def download_media(
info_dict = {
"id": media_path.stem,
"title": title or media_path.stem,
"ext": media_path.suffix.lstrip(".")
"ext": media_path.suffix.lstrip("."),
}
return DownloadMediaResult(
@@ -1079,11 +1119,7 @@ def download_media(
except Exception as e:
log(f"Error extracting tags: {e}", file=sys.stderr)
source_url = (
entry.get("webpage_url")
or entry.get("original_url")
or entry.get("url")
)
source_url = entry.get("webpage_url") or entry.get("original_url") or entry.get("url")
if not opts.quiet:
debug(f"✓ Downloaded: {media_path.name} ({len(tags)} tags)")
@@ -1163,16 +1199,33 @@ class Download_Media(Cmdlet):
SharedArgs.URL,
SharedArgs.QUERY,
CmdletArg(name="audio", type="flag", alias="a", description="Download audio only"),
CmdletArg(name="format", type="string", alias="fmt", description="Explicit yt-dlp format selector"),
CmdletArg(
name="clip",
name="format",
type="string",
description="Extract time range(s) or keyed spec (e.g., clip:3m4s-3m14s,item:2-3)",
alias="fmt",
description="Explicit yt-dlp format selector",
),
CmdletArg(name="item", type="string", description="Item selection for playlists/formats"),
SharedArgs.PATH
QueryArg(
"clip",
key="clip",
aliases=["range", "section", "sections"],
type="string",
required=False,
description=(
"Clip time ranges via -query keyed fields (e.g. clip:1m-2m or clip:00:01-00:10). "
"Comma-separated values supported."
),
query_only=True,
),
CmdletArg(
name="item", type="string", description="Item selection for playlists/formats"
),
SharedArgs.PATH,
],
detail=[
"Download media from streaming sites using yt-dlp.",
"For direct file downloads, use download-file.",
],
detail=["Download media from streaming sites using yt-dlp.", "For direct file downloads, use download-file."],
exec=self.run,
)
self.register()
@@ -1192,7 +1245,7 @@ class Download_Media(Cmdlet):
raw_url = [raw_url]
expanded_urls: List[str] = []
for u in (raw_url or []):
for u in raw_url or []:
if u is None:
continue
s = str(u).strip()
@@ -1233,15 +1286,46 @@ class Download_Media(Cmdlet):
if not query_spec:
return {}
try:
return self._parse_keyed_csv_spec(str(query_spec), default_key="hash")
keyed = self._parse_keyed_csv_spec(str(query_spec), default_key="hash")
if not keyed:
return {}
# Normalize aliases so users can write shorter/alternate keys.
# Note: download-media uses a comma-separated keyed spec language inside -query.
def _alias(src: str, dest: str) -> None:
try:
values = keyed.get(src)
except Exception:
values = None
if not values:
return
try:
keyed.setdefault(dest, []).extend(list(values))
except Exception:
pass
try:
keyed.pop(src, None)
except Exception:
pass
for src in ("range", "ranges", "section", "sections"):
_alias(src, "clip")
for src in ("fmt", "f"):
_alias(src, "format")
for src in ("aud", "a"):
_alias(src, "audio")
return keyed
except Exception:
return {}
@staticmethod
def _extract_hash_override(query_spec: Optional[str], query_keyed: Dict[str, List[str]]) -> Optional[str]:
def _extract_hash_override(
query_spec: Optional[str], query_keyed: Dict[str, List[str]]
) -> Optional[str]:
try:
hash_values = query_keyed.get("hash", []) if isinstance(query_keyed, dict) else []
hash_candidate = (hash_values[-1] if hash_values else None)
hash_candidate = hash_values[-1] if hash_values else None
if hash_candidate:
return sh.parse_single_hash_query(f"hash:{hash_candidate}")
@@ -1276,7 +1360,7 @@ class Download_Media(Cmdlet):
if clip_spec:
# Support keyed clip syntax:
# -clip "clip:3m4s-3m14s,1h22m-1h33m,item:2-3"
# -query "clip:3m4s-3m14s,1h22m-1h33m,item:2-3"
keyed = self._parse_keyed_csv_spec(str(clip_spec), default_key="clip")
clip_values.extend(keyed.get("clip", []) or [])
item_values.extend(keyed.get("item", []) or [])
@@ -1305,8 +1389,10 @@ class Download_Media(Cmdlet):
hydrus_available = True
try:
from Store import Store
storage = Store(config=config or {}, suppress_debug=True)
from API.HydrusNetwork import is_hydrus_available
hydrus_available = bool(is_hydrus_available(config or {}))
except Exception:
storage = None
@@ -1394,7 +1480,9 @@ class Download_Media(Cmdlet):
ytdlp_tool=ytdlp_tool,
)
if not fmts:
raise ValueError("Unable to list formats for the URL; cannot resolve numeric format index")
raise ValueError(
"Unable to list formats for the URL; cannot resolve numeric format index"
)
candidate_formats = [f for f in fmts if self._is_browseable_format(f)]
filtered_formats = candidate_formats if candidate_formats else list(fmts)
@@ -1454,7 +1542,9 @@ class Download_Media(Cmdlet):
return f"bv*[height<={height}]+ba"
@staticmethod
def _canonicalize_url_for_storage(*, requested_url: str, ytdlp_tool: YtDlpTool, playlist_items: Optional[str]) -> str:
def _canonicalize_url_for_storage(
*, requested_url: str, ytdlp_tool: YtDlpTool, playlist_items: Optional[str]
) -> str:
# Prefer yt-dlp's canonical webpage URL (e.g. strips timestamps/redirects).
# Fall back to the requested URL if probing fails.
# Important: when playlist item selection is used, avoid probing (can hang on large playlists).
@@ -1581,7 +1671,12 @@ class Download_Media(Cmdlet):
except Exception:
continue
if backend_hits:
url_matches.extend([dict(x) if isinstance(x, dict) else {"title": str(x)} for x in backend_hits])
url_matches.extend(
[
dict(x) if isinstance(x, dict) else {"title": str(x)}
for x in backend_hits
]
)
if len(url_matches) >= 25:
url_matches = url_matches[:25]
@@ -1608,8 +1703,12 @@ class Download_Media(Cmdlet):
in_pipeline = bool(stage_ctx is not None or ("|" in str(current_cmd_text or "")))
if in_pipeline:
try:
cached_cmd = pipeline_context.load_value("preflight.url_duplicates.command", default="")
cached_decision = pipeline_context.load_value("preflight.url_duplicates.continue", default=None)
cached_cmd = pipeline_context.load_value(
"preflight.url_duplicates.command", default=""
)
cached_decision = pipeline_context.load_value(
"preflight.url_duplicates.continue", default=None
)
except Exception:
cached_cmd = ""
cached_decision = None
@@ -1618,7 +1717,9 @@ class Download_Media(Cmdlet):
if bool(cached_decision):
return True
try:
pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0)
pipeline_context.request_pipeline_stop(
reason="duplicate-url declined", exit_code=0
)
except Exception:
pass
return False
@@ -1627,7 +1728,9 @@ class Download_Media(Cmdlet):
results_list: List[Dict[str, Any]] = []
for item in url_matches:
if "title" not in item:
item["title"] = item.get("name") or item.get("target") or item.get("path") or "Result"
item["title"] = (
item.get("name") or item.get("target") or item.get("path") or "Result"
)
# Keep the full payload for history/inspection, but display a focused table.
# Use shared extractors so Ext/Size/Store/Hash remain consistent everywhere.
@@ -1637,7 +1740,9 @@ class Download_Media(Cmdlet):
build_display_row = None # type: ignore
if callable(build_display_row):
display_row = build_display_row(item, keys=["title", "store", "hash", "ext", "size"])
display_row = build_display_row(
item, keys=["title", "store", "hash", "ext", "size"]
)
else:
display_row = {
"title": item.get("title"),
@@ -1669,7 +1774,9 @@ class Download_Media(Cmdlet):
with cm:
get_stderr_console().print(table)
setattr(table, "_rendered_by_cmdlet", True)
answered_yes = bool(Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()))
answered_yes = bool(
Confirm.ask("Continue anyway?", default=False, console=get_stderr_console())
)
# Cache decision for the duration of this pipeline/command.
if in_pipeline:
@@ -1692,7 +1799,9 @@ class Download_Media(Cmdlet):
if not answered_yes:
if in_pipeline and used_suspend:
try:
pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0)
pipeline_context.request_pipeline_stop(
reason="duplicate-url declined", exit_code=0
)
except Exception:
pass
return False
@@ -1730,8 +1839,12 @@ class Download_Media(Cmdlet):
in_pipeline = bool(stage_ctx is not None or ("|" in str(current_cmd_text or "")))
if in_pipeline:
try:
cached_cmd = pipeline_context.load_value("preflight.url_duplicates.command", default="")
cached_decision = pipeline_context.load_value("preflight.url_duplicates.continue", default=None)
cached_cmd = pipeline_context.load_value(
"preflight.url_duplicates.command", default=""
)
cached_decision = pipeline_context.load_value(
"preflight.url_duplicates.continue", default=None
)
except Exception:
cached_cmd = ""
cached_decision = None
@@ -1740,7 +1853,9 @@ class Download_Media(Cmdlet):
if bool(cached_decision):
return True
try:
pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0)
pipeline_context.request_pipeline_stop(
reason="duplicate-url declined", exit_code=0
)
except Exception:
pass
return False
@@ -1928,7 +2043,13 @@ class Download_Media(Cmdlet):
seen_pairs.add((original_url, str(backend_name)))
matched_urls.add(original_url)
hit = backend_hits[0]
title = hit.get("title") or hit.get("name") or hit.get("target") or hit.get("path") or "(exists)"
title = (
hit.get("title")
or hit.get("name")
or hit.get("target")
or hit.get("path")
or "(exists)"
)
file_hash = hit.get("hash") or hit.get("file_hash") or hit.get("sha256") or ""
try:
@@ -1945,7 +2066,9 @@ class Download_Media(Cmdlet):
}
if callable(build_display_row):
try:
extracted = build_display_row(hit, keys=["title", "store", "hash", "ext", "size"])
extracted = build_display_row(
hit, keys=["title", "store", "hash", "ext", "size"]
)
except Exception:
pass
# Ensure we still prefer the precomputed values for title/store/hash.
@@ -2009,7 +2132,9 @@ class Download_Media(Cmdlet):
cm = nullcontext()
with cm:
answered_yes = bool(Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()))
answered_yes = bool(
Confirm.ask("Continue anyway?", default=False, console=get_stderr_console())
)
# Cache decision for the duration of this pipeline/command.
if in_pipeline:
@@ -2032,7 +2157,9 @@ class Download_Media(Cmdlet):
if not answered_yes:
if in_pipeline:
try:
pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0)
pipeline_context.request_pipeline_stop(
reason="duplicate-url declined", exit_code=0
)
except Exception:
pass
return False
@@ -2056,7 +2183,9 @@ class Download_Media(Cmdlet):
# `download-media` before `merge-file`).
extractor_name = ""
try:
extractor_name = str(pr.get("extractor") or pr.get("extractor_key") or "").strip().lower()
extractor_name = (
str(pr.get("extractor") or pr.get("extractor_key") or "").strip().lower()
)
except Exception:
extractor_name = ""
table_type: Optional[str] = None
@@ -2125,7 +2254,9 @@ class Download_Media(Cmdlet):
"detail": str(uploader or ""),
"media_kind": "playlist-item",
"playlist_index": idx,
"_selection_args": (["-url", str(entry_url)] if entry_url else ["-url", str(url), "-item", str(idx)]),
"_selection_args": (
["-url", str(entry_url)] if entry_url else ["-url", str(url), "-item", str(idx)]
),
"url": entry_url,
"target": entry_url,
"columns": [
@@ -2163,10 +2294,11 @@ class Download_Media(Cmdlet):
args: Sequence[str],
) -> Optional[int]:
# If no -item, no explicit -format specified, and single URL, show the format table.
# Do NOT stop to show formats when -audio is used (auto-pick) or when -clip is used.
# Do NOT stop to show formats when -audio is used (auto-pick) or when clip ranges are requested via -query.
if (
mode != "audio"
and not clip_spec
and not clip_values
and not playlist_items
and not ytdl_format
and len(supported_url) == 1
@@ -2228,12 +2360,16 @@ class Download_Media(Cmdlet):
# Build the base command that will be replayed with @N selection
base_cmd = f'download-media "{url}"'
remaining_args = [arg for arg in args if arg not in [url] and not arg.startswith('-')]
remaining_args = [
arg for arg in args if arg not in [url] and not arg.startswith("-")
]
if remaining_args:
base_cmd += ' ' + ' '.join(remaining_args)
base_cmd += " " + " ".join(remaining_args)
# Create result table for display
table = ResultTable(title=f"Available formats for {url}", max_columns=10, preserve_order=True)
table = ResultTable(
title=f"Available formats for {url}", max_columns=10, preserve_order=True
)
table.set_table("ytdlp.formatlist")
table.set_source_command("download-media", [url])
@@ -2311,7 +2447,9 @@ class Download_Media(Cmdlet):
selection_args: List[str] = ["-format", selection_format_id]
try:
if (not clip_spec) and clip_values:
selection_args.extend(["-clip", ",".join([v for v in clip_values if v])])
selection_args.extend(
["-query", f"clip:{','.join([v for v in clip_values if v])}"]
)
except Exception:
pass
format_dict["_selection_args"] = selection_args
@@ -2410,7 +2548,12 @@ class Download_Media(Cmdlet):
actual_format = configured
forced_single_applied = False
if forced_single_format_for_batch and forced_single_format_id and not ytdl_format and not actual_playlist_items:
if (
forced_single_format_for_batch
and forced_single_format_id
and not ytdl_format
and not actual_playlist_items
):
actual_format = forced_single_format_id
forced_single_applied = True
@@ -2433,14 +2576,20 @@ class Download_Media(Cmdlet):
)
if formats:
fmt_match = next(
(f for f in formats if str(f.get("format_id", "")) == actual_format),
(
f
for f in formats
if str(f.get("format_id", "")) == actual_format
),
None,
)
if fmt_match:
vcodec = str(fmt_match.get("vcodec", "none"))
acodec = str(fmt_match.get("acodec", "none"))
if vcodec != "none" and acodec == "none":
debug(f"Selected video-only format {actual_format}; using {actual_format}+ba for audio")
debug(
f"Selected video-only format {actual_format}; using {actual_format}+ba for audio"
)
actual_format = f"{actual_format}+ba"
except Exception:
pass
@@ -2475,7 +2624,9 @@ class Download_Media(Cmdlet):
except Exception:
detail = ""
if ("requested format is not available" in (detail or "").lower()) and mode != "audio":
if (
"requested format is not available" in (detail or "").lower()
) and mode != "audio":
if (
forced_single_format_for_batch
and forced_single_format_id
@@ -2485,7 +2636,9 @@ class Download_Media(Cmdlet):
):
attempted_single_format_fallback = True
actual_format = forced_single_format_id
debug(f"Only one format available (playlist preflight); retrying with: {actual_format}")
debug(
f"Only one format available (playlist preflight); retrying with: {actual_format}"
)
continue
formats = self._list_formats_cached(
@@ -2514,13 +2667,19 @@ class Download_Media(Cmdlet):
if selection_format_id:
attempted_single_format_fallback = True
actual_format = selection_format_id
debug(f"Only one format available; retrying with: {actual_format}")
debug(
f"Only one format available; retrying with: {actual_format}"
)
continue
if formats:
formats_to_show = formats
table = ResultTable(title=f"Available formats for {url}", max_columns=10, preserve_order=True)
table = ResultTable(
title=f"Available formats for {url}",
max_columns=10,
preserve_order=True,
)
table.set_table("ytdlp.formatlist")
table.set_source_command("download-media", [url])
@@ -2605,7 +2764,10 @@ class Download_Media(Cmdlet):
PipelineProgress(pipeline_context).step("awaiting selection")
log("Requested format is not available; select a working format with @N", file=sys.stderr)
log(
"Requested format is not available; select a working format with @N",
file=sys.stderr,
)
return 0
raise
@@ -2654,7 +2816,11 @@ class Download_Media(Cmdlet):
pass
try:
info = downloaded.info if isinstance(getattr(downloaded, "info", None), dict) else {}
info = (
downloaded.info
if isinstance(getattr(downloaded, "info", None), dict)
else {}
)
except Exception:
info = {}
chapters_text = _format_chapters_note(info) if embed_chapters else None
@@ -2695,7 +2861,9 @@ class Download_Media(Cmdlet):
canonical_url,
hydrus_available=hydrus_available,
)
self._apply_clip_decorations(pipe_objects, clip_ranges, source_king_hash=source_hash)
self._apply_clip_decorations(
pipe_objects, clip_ranges, source_king_hash=source_hash
)
except Exception:
pass
@@ -2756,7 +2924,9 @@ class Download_Media(Cmdlet):
return 1
if unsupported_list:
debug(f"Skipping {len(unsupported_list)} unsupported url (use download-file for direct downloads)")
debug(
f"Skipping {len(unsupported_list)} unsupported url (use download-file for direct downloads)"
)
# Get output directory
final_output_dir = self._resolve_output_dir(parsed, config)
@@ -2775,7 +2945,9 @@ class Download_Media(Cmdlet):
# -query "clip:1m-1m15s,2m1s-2m11s"
# -query "hash:<sha256>,clip:1m-1m15s,item:2-3"
# -query "format:audio,item:1-3" (audio-only + playlist selection)
query_keyed = self._parse_query_keyed_spec(str(query_spec) if query_spec is not None else None)
query_keyed = self._parse_query_keyed_spec(
str(query_spec) if query_spec is not None else None
)
# Optional: allow an explicit hash via -query "hash:<sha256>".
# This is used as the preferred king hash for multi-clip relationships.
@@ -2791,6 +2963,7 @@ class Download_Media(Cmdlet):
# QueryArgs:
# - format:audio => audio-only (highest quality audio)
# - format:<ytdlp-format> => equivalent to -format <ytdlp-format>
# - audio:true|false => audio-only mode toggle (preferred over format:audio when provided)
query_format: Optional[str] = None
try:
fmt_values = query_keyed.get("format", []) if isinstance(query_keyed, dict) else []
@@ -2800,6 +2973,22 @@ class Download_Media(Cmdlet):
except Exception:
query_format = None
query_audio: Optional[bool] = None
try:
audio_values = query_keyed.get("audio", []) if isinstance(query_keyed, dict) else []
audio_candidate = audio_values[-1] if audio_values else None
if audio_candidate is not None:
s = str(audio_candidate).strip().lower()
if s in {"1", "true", "t", "yes", "y", "on"}:
query_audio = True
elif s in {"0", "false", "f", "no", "n", "off"}:
query_audio = False
elif s:
# Any other non-empty value implies "on".
query_audio = True
except Exception:
query_audio = None
query_wants_audio = False
if query_format:
try:
@@ -2807,8 +2996,15 @@ class Download_Media(Cmdlet):
except Exception:
query_wants_audio = False
# Explicit CLI flag wins; else query format:audio can select audio mode.
mode = "audio" if (parsed.get("audio") or query_wants_audio) else "video"
# Explicit CLI flag wins; else query audio: can select audio mode.
# Back-compat: format:audio is still supported.
audio_flag = bool(parsed.get("audio") is True)
wants_audio = audio_flag
if query_audio is not None:
wants_audio = wants_audio or bool(query_audio)
else:
wants_audio = wants_audio or bool(query_wants_audio)
mode = "audio" if wants_audio else "video"
clip_ranges, clip_invalid, clip_values = self._parse_clip_ranges_and_apply_items(
clip_spec=str(clip_spec) if clip_spec is not None else None,
@@ -2825,9 +3021,13 @@ class Download_Media(Cmdlet):
except Exception:
pass
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
quiet_mode = (
bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
)
storage, hydrus_available = self._init_storage(config if isinstance(config, dict) else {})
storage, hydrus_available = self._init_storage(
config if isinstance(config, dict) else {}
)
# Check if we need to show format selection
formats_cache: Dict[str, Optional[List[Dict[str, Any]]]] = {}
@@ -2866,7 +3066,9 @@ class Download_Media(Cmdlet):
# This allows non-interactive invocation from shells (PowerShell treats '@' specially).
if query_format and not query_wants_audio:
try:
idx_fmt = self._format_id_for_query_index(query_format, candidate_url, formats_cache, ytdlp_tool)
idx_fmt = self._format_id_for_query_index(
query_format, candidate_url, formats_cache, ytdlp_tool
)
except ValueError as e:
log(f"Error parsing format selection: {e}", file=sys.stderr)
return 1
@@ -2882,25 +3084,39 @@ class Download_Media(Cmdlet):
# If we printed a format table, give a quick hint for non-interactive selection.
try:
last_table = pipeline_context.get_last_result_table() if hasattr(pipeline_context, "get_last_result_table") else None
last_table = (
pipeline_context.get_last_result_table()
if hasattr(pipeline_context, "get_last_result_table")
else None
)
if hasattr(last_table, "rows") and getattr(last_table, "rows", None):
# Build user-friendly examples using the base command we already constructed
sample_index = 1
sample_fmt_id = None
try:
sample_row = last_table.rows[0]
sample_fmt_id = sample_row._full_metadata.get("item_selector") if getattr(sample_row, "_full_metadata", None) else None
sample_fmt_id = (
sample_row._full_metadata.get("item_selector")
if getattr(sample_row, "_full_metadata", None)
else None
)
except Exception:
sample_fmt_id = None
try:
# Use single quotes inside the outer quotes so PowerShell doesn't interpret the pipe character
sample_pipeline = base_cmd.replace(f'"{candidate_url}"', f"'{candidate_url}'")
sample_pipeline = base_cmd.replace(
f'"{candidate_url}"', f"'{candidate_url}'"
)
hint = (
"To select non-interactively, re-run with an explicit format: "
"e.g. mm \"{pipeline} -format {fmt} | add-file -store <store>\" or "
'e.g. mm "{pipeline} -format {fmt} | add-file -store <store>" or '
"mm \"{pipeline} -query 'format:{index}' | add-file -store <store>\""
).format(pipeline=sample_pipeline, fmt=sample_fmt_id or "<format_id>", index=sample_index)
).format(
pipeline=sample_pipeline,
fmt=sample_fmt_id or "<format_id>",
index=sample_index,
)
log(hint, file=sys.stderr)
except Exception:
pass
@@ -2925,7 +3141,6 @@ class Download_Media(Cmdlet):
# discover it once and force it for every item. This avoids per-item failures
# and per-item --list-formats calls (e.g. Bandcamp albums).
forced_single_format_id: Optional[str] = None
forced_single_format_for_batch = False
if len(supported_url) > 1 and not playlist_items and not ytdl_format:
@@ -3066,7 +3281,11 @@ class Download_Media(Cmdlet):
)
except Exception:
unit_match = None
if unit_match and unit_match.group(0).strip() and any(unit_match.group(g) for g in ("h", "m", "s")):
if (
unit_match
and unit_match.group(0).strip()
and any(unit_match.group(g) for g in ("h", "m", "s"))
):
try:
hours = int(unit_match.group("h") or 0)
minutes = int(unit_match.group("m") or 0)
@@ -3170,9 +3389,13 @@ class Download_Media(Cmdlet):
ranges.append(f"{start_s}-{end_s}")
return ",".join(ranges) if ranges else None
def _build_pipe_object(self, download_result: Any, url: str, opts: DownloadOptions) -> Dict[str, Any]:
def _build_pipe_object(
self, download_result: Any, url: str, opts: DownloadOptions
) -> Dict[str, Any]:
"""Create a PipeObject-compatible dict from a DownloadMediaResult."""
info: Dict[str, Any] = download_result.info if isinstance(download_result.info, dict) else {}
info: Dict[str, Any] = (
download_result.info if isinstance(download_result.info, dict) else {}
)
media_path = Path(download_result.path)
hash_value = download_result.hash_value or self._compute_file_hash(media_path)
title = info.get("title") or media_path.stem
@@ -3207,7 +3430,9 @@ class Download_Media(Cmdlet):
"is_temp": True,
"ytdl_format": getattr(opts, "ytdl_format", None),
# download_mode removed (deprecated), keep media_kind
"store": getattr(opts, "storage_name", None) or getattr(opts, "storage_location", None) or "PATH",
"store": getattr(opts, "storage_name", None)
or getattr(opts, "storage_location", None)
or "PATH",
"media_kind": "video" if opts.mode == "video" else "audio",
}
@@ -3266,7 +3491,11 @@ class Download_Media(Cmdlet):
except Exception:
pass
try:
if HydrusNetwork is not None and isinstance(backend, HydrusNetwork) and not hydrus_available:
if (
HydrusNetwork is not None
and isinstance(backend, HydrusNetwork)
and not hydrus_available
):
continue
except Exception:
pass
@@ -3367,6 +3596,7 @@ class Download_Media(Cmdlet):
def _compute_file_hash(self, filepath: Path) -> str:
"""Compute SHA256 hash of a file."""
import hashlib
sha256_hash = hashlib.sha256()
with open(filepath, "rb") as f:
for byte_block in iter(lambda: f.read(4096), b""):
+49 -18
View File
@@ -17,6 +17,7 @@ from typing import Any, Dict, Optional, Sequence
from SYS.logger import log
from . import _shared as sh
class Download_Torrent(sh.Cmdlet):
"""Class-based download-torrent cmdlet with self-registration."""
@@ -27,10 +28,29 @@ class Download_Torrent(sh.Cmdlet):
usage="download-torrent <magnet|.torrent> [options]",
alias=["torrent", "magnet"],
arg=[
sh.CmdletArg(name="magnet", type="string", required=False, description="Magnet link or .torrent file/URL", variadic=True),
sh.CmdletArg(name="output", type="string", description="Output directory for downloaded files"),
sh.CmdletArg(name="wait", type="float", description="Wait time (seconds) for magnet processing timeout"),
sh.CmdletArg(name="background", type="flag", alias="bg", description="Start download in background"),
sh.CmdletArg(
name="magnet",
type="string",
required=False,
description="Magnet link or .torrent file/URL",
variadic=True,
),
sh.CmdletArg(
name="output",
type="string",
description="Output directory for downloaded files",
),
sh.CmdletArg(
name="wait",
type="float",
description="Wait time (seconds) for magnet processing timeout",
),
sh.CmdletArg(
name="background",
type="flag",
alias="bg",
description="Start download in background",
),
],
detail=["Download torrents/magnets via AllDebrid API."],
exec=self.run,
@@ -51,7 +71,10 @@ class Download_Torrent(sh.Cmdlet):
except Exception:
api_key = None
if not api_key:
log("AllDebrid API key not configured (check config.conf [provider=alldebrid] api_key=...)", file=sys.stderr)
log(
"AllDebrid API key not configured (check config.conf [provider=alldebrid] api_key=...)",
file=sys.stderr,
)
return 1
for magnet_url in magnet_args:
if background_mode:
@@ -64,7 +87,9 @@ class Download_Torrent(sh.Cmdlet):
magnet_id = self._submit_magnet(worker_id, magnet_url, api_key)
if magnet_id <= 0:
continue
self._start_background_magnet_worker(worker_id, magnet_id, output_dir, api_key, wait_timeout)
self._start_background_magnet_worker(
worker_id, magnet_id, output_dir, api_key, wait_timeout
)
log(f"⧗ Torrent processing started (ID: {magnet_id})")
return 0
@@ -80,7 +105,7 @@ class Download_Torrent(sh.Cmdlet):
client = AllDebridClient(api_key)
log(f"[Worker {worker_id}] Submitting magnet to AllDebrid...")
magnet_info = client.magnet_add(magnet_url)
magnet_id = int(magnet_info.get('id', 0))
magnet_id = int(magnet_info.get("id", 0))
if magnet_id <= 0:
log(f"[Worker {worker_id}] Magnet add failed", file=sys.stderr)
return 0
@@ -90,7 +115,9 @@ class Download_Torrent(sh.Cmdlet):
log(f"[Worker {worker_id}] Magnet submit failed: {e}", file=sys.stderr)
return 0
def _start_background_magnet_worker(self, worker_id: str, magnet_id: int, output_dir: Path, api_key: str, wait_timeout: int) -> None:
def _start_background_magnet_worker(
self, worker_id: str, magnet_id: int, output_dir: Path, api_key: str, wait_timeout: int
) -> None:
thread = threading.Thread(
target=self._download_magnet_worker,
args=(worker_id, magnet_id, output_dir, api_key, wait_timeout),
@@ -119,7 +146,7 @@ class Download_Torrent(sh.Cmdlet):
elapsed = 0
while elapsed < wait_timeout:
status = client.magnet_status(magnet_id)
if status.get('ready'):
if status.get("ready"):
break
time.sleep(5)
elapsed += 5
@@ -129,13 +156,13 @@ class Download_Torrent(sh.Cmdlet):
files_result = client.magnet_links([magnet_id])
magnet_files = files_result.get(str(magnet_id), {})
files_array = magnet_files.get('files', [])
files_array = magnet_files.get("files", [])
if not files_array:
log(f"[Worker {worker_id}] No files found", file=sys.stderr)
return
for file_info in files_array:
file_url = file_info.get('link')
file_name = file_info.get('name')
file_url = file_info.get("link")
file_name = file_info.get("name")
if file_url and file_name:
Download_Torrent._download_file(file_url, output_dir / file_name)
log(f"[Worker {worker_id}] ✓ Downloaded {file_name}")
@@ -154,20 +181,22 @@ class Download_Torrent(sh.Cmdlet):
) -> None:
try:
from API.alldebrid import AllDebridClient
client = AllDebridClient(api_key)
log(f"[Worker {worker_id}] Submitting magnet to AllDebrid...")
magnet_info = client.magnet_add(magnet_url)
magnet_id = int(magnet_info.get('id', 0))
magnet_id = int(magnet_info.get("id", 0))
if magnet_id <= 0:
log(f"[Worker {worker_id}] Magnet add failed", file=sys.stderr)
return
log(f"[Worker {worker_id}] ✓ Magnet added (ID: {magnet_id})")
# Poll for ready status (simplified)
import time
elapsed = 0
while elapsed < wait_timeout:
status = client.magnet_status(magnet_id)
if status.get('ready'):
if status.get("ready"):
break
time.sleep(5)
elapsed += 5
@@ -176,13 +205,13 @@ class Download_Torrent(sh.Cmdlet):
return
files_result = client.magnet_links([magnet_id])
magnet_files = files_result.get(str(magnet_id), {})
files_array = magnet_files.get('files', [])
files_array = magnet_files.get("files", [])
if not files_array:
log(f"[Worker {worker_id}] No files found", file=sys.stderr)
return
for file_info in files_array:
file_url = file_info.get('link')
file_name = file_info.get('name')
file_url = file_info.get("link")
file_name = file_info.get("name")
if file_url:
Download_Torrent._download_file(file_url, output_dir / file_name)
log(f"[Worker {worker_id}] ✓ Downloaded {file_name}")
@@ -193,8 +222,9 @@ class Download_Torrent(sh.Cmdlet):
def _download_file(url: str, dest: Path) -> None:
try:
import requests
resp = requests.get(url, stream=True)
with open(dest, 'wb') as f:
with open(dest, "wb") as f:
for chunk in resp.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
@@ -211,4 +241,5 @@ class Download_Torrent(sh.Cmdlet):
)
thread.start()
CMDLET = Download_Torrent()
+52 -22
View File
@@ -39,7 +39,7 @@ class Get_File(sh.Cmdlet):
],
detail=[
"- Exports file from storage backend to local path",
"- Uses selected item's hash, or -query \"hash:<sha256>\"",
'- Uses selected item\'s hash, or -query "hash:<sha256>"',
"- Preserves file extension and metadata",
],
exec=self.run,
@@ -66,7 +66,7 @@ class Get_File(sh.Cmdlet):
debug(f"[get-file] file_hash={file_hash} store_name={store_name}")
if not file_hash:
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
log('Error: No file hash provided (pipe an item or use -query "hash:<sha256>")')
return 1
if not store_name:
@@ -92,7 +92,9 @@ class Get_File(sh.Cmdlet):
if not metadata:
log(f"Error: File metadata not found for hash {file_hash}")
return 1
debug(f"[get-file] Metadata retrieved: title={metadata.get('title')}, ext={metadata.get('ext')}")
debug(
f"[get-file] Metadata retrieved: title={metadata.get('title')}, ext={metadata.get('ext')}"
)
def resolve_display_title() -> str:
candidates = [
@@ -119,7 +121,9 @@ class Get_File(sh.Cmdlet):
debug(f"[get-file] backend.get_file returned: {source_path}")
# Check if backend returned a URL (HydrusNetwork case)
if isinstance(source_path, str) and (source_path.startswith("http://") or source_path.startswith("https://")):
if isinstance(source_path, str) and (
source_path.startswith("http://") or source_path.startswith("https://")
):
# Hydrus backend returns a URL; open it only for this explicit user action.
try:
webbrowser.open(source_path)
@@ -129,12 +133,14 @@ class Get_File(sh.Cmdlet):
debug(f"Opened in browser: {source_path}", file=sys.stderr)
# Emit result for pipeline
ctx.emit({
ctx.emit(
{
"hash": file_hash,
"store": store_name,
"url": source_path,
"title": resolve_display_title() or "Opened",
})
}
)
return 0
# Otherwise treat as file path (local/folder backends)
@@ -154,13 +160,15 @@ class Get_File(sh.Cmdlet):
ext_for_emit = metadata.get("ext") or source_path.suffix.lstrip(".")
self._open_file_default(source_path)
log(f"Opened: {source_path}", file=sys.stderr)
ctx.emit({
ctx.emit(
{
"hash": file_hash,
"store": store_name,
"path": str(source_path),
"title": str(display_title),
"ext": str(ext_for_emit or ""),
})
}
)
debug("[get-file] Completed successfully")
return 0
@@ -177,14 +185,18 @@ class Get_File(sh.Cmdlet):
if output_name:
filename = output_name
else:
title = (metadata.get("title") if isinstance(metadata, dict) else None) or resolve_display_title() or "export"
title = (
(metadata.get("title") if isinstance(metadata, dict) else None)
or resolve_display_title()
or "export"
)
filename = self._sanitize_filename(title)
# Add extension if metadata has it
ext = metadata.get("ext")
if ext and not filename.endswith(ext):
if not ext.startswith('.'):
ext = '.' + ext
if not ext.startswith("."):
ext = "." + ext
filename += ext
dest_path = self._unique_path(output_dir / filename)
@@ -196,12 +208,14 @@ class Get_File(sh.Cmdlet):
log(f"Exported: {dest_path}", file=sys.stderr)
# Emit result for pipeline
ctx.emit({
ctx.emit(
{
"hash": file_hash,
"store": store_name,
"path": str(dest_path),
"title": filename,
})
}
)
debug(f"[get-file] Completed successfully")
return 0
@@ -216,7 +230,17 @@ class Get_File(sh.Cmdlet):
if self._open_local_file_in_browser_via_http(path):
return
if suffix in {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tif", ".tiff", ".svg"}:
if suffix in {
".png",
".jpg",
".jpeg",
".gif",
".webp",
".bmp",
".tif",
".tiff",
".svg",
}:
# Use default web browser for images.
if self._open_image_in_default_browser(path):
return
@@ -225,9 +249,13 @@ class Get_File(sh.Cmdlet):
os.startfile(str(path)) # type: ignore[attr-defined]
return
if sys.platform == "darwin":
subprocess.Popen(["open", str(path)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
subprocess.Popen(
["open", str(path)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
)
return
subprocess.Popen(["xdg-open", str(path)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
subprocess.Popen(
["xdg-open", str(path)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
)
except Exception as exc:
log(f"Error opening file: {exc}", file=sys.stderr)
@@ -282,7 +310,9 @@ class Get_File(sh.Cmdlet):
url = f"http://127.0.0.1:{port}/{quote(filename)}"
# Run server in the background.
server_thread = threading.Thread(target=httpd.serve_forever, kwargs={"poll_interval": 0.2}, daemon=True)
server_thread = threading.Thread(
target=httpd.serve_forever, kwargs={"poll_interval": 0.2}, daemon=True
)
server_thread.start()
# Auto-shutdown after a timeout to avoid lingering servers.
@@ -326,10 +356,10 @@ class Get_File(sh.Cmdlet):
"\n".join(
[
"<!doctype html>",
"<meta charset=\"utf-8\">",
'<meta charset="utf-8">',
f"<title>{resolved.name}</title>",
"<style>html,body{margin:0;padding:0;background:#000}img{display:block;max-width:100vw;max-height:100vh;margin:auto}</style>",
f"<img src=\"{image_url}\" alt=\"{resolved.name}\">",
f'<img src="{image_url}" alt="{resolved.name}">',
]
),
encoding="utf-8",
@@ -351,13 +381,13 @@ class Get_File(sh.Cmdlet):
"""Sanitize filename by removing invalid characters."""
allowed_chars = []
for ch in str(name):
if ch.isalnum() or ch in {'-', '_', ' ', '.'}:
if ch.isalnum() or ch in {"-", "_", " ", "."}:
allowed_chars.append(ch)
else:
allowed_chars.append(' ')
allowed_chars.append(" ")
# Collapse multiple spaces
sanitized = ' '.join(''.join(allowed_chars).split())
sanitized = " ".join("".join(allowed_chars).split())
return sanitized or "export"
def _unique_path(self, path: Path) -> Path:
+25 -8
View File
@@ -26,7 +26,7 @@ class Get_Metadata(Cmdlet):
super().__init__(
name="get-metadata",
summary="Print metadata for files by hash and storage backend.",
usage="get-metadata [-query \"hash:<sha256>\"] [-store <backend>]",
usage='get-metadata [-query "hash:<sha256>"] [-store <backend>]',
alias=["meta"],
arg=[
SharedArgs.QUERY,
@@ -57,6 +57,7 @@ class Get_Metadata(Cmdlet):
if isinstance(explicit, str):
try:
import datetime as _dt
return int(_dt.datetime.fromisoformat(explicit).timestamp())
except Exception:
pass
@@ -70,14 +71,24 @@ class Get_Metadata(Cmdlet):
return ""
try:
import datetime as _dt
return _dt.datetime.utcfromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S")
except Exception:
return ""
@staticmethod
def _build_table_row(title: str, store: str, path: str, mime: str, size_bytes: Optional[int],
dur_seconds: Optional[int], imported_ts: Optional[int], url: list[str],
hash_value: Optional[str], pages: Optional[int] = None) -> Dict[str, Any]:
def _build_table_row(
title: str,
store: str,
path: str,
mime: str,
size_bytes: Optional[int],
dur_seconds: Optional[int],
imported_ts: Optional[int],
url: list[str],
hash_value: Optional[str],
pages: Optional[int] = None,
) -> Dict[str, Any]:
"""Build a table row dict with metadata fields."""
size_mb = None
size_int: Optional[int] = None
@@ -156,7 +167,7 @@ class Get_Metadata(Cmdlet):
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("No hash available - use -query \"hash:<sha256>\"", file=sys.stderr)
log('No hash available - use -query "hash:<sha256>"', file=sys.stderr)
return 1
# Get hash and store from parsed args or result
@@ -164,7 +175,7 @@ class Get_Metadata(Cmdlet):
storage_source = parsed.get("store") or get_field(result, "store")
if not file_hash:
log("No hash available - use -query \"hash:<sha256>\"", file=sys.stderr)
log('No hash available - use -query "hash:<sha256>"', file=sys.stderr)
return 1
if not storage_source:
@@ -174,6 +185,7 @@ class Get_Metadata(Cmdlet):
# Use storage backend to get metadata
try:
from Store import Store
storage = Store(config)
backend = storage[storage_source]
@@ -181,7 +193,10 @@ class Get_Metadata(Cmdlet):
metadata = backend.get_metadata(file_hash)
if not metadata:
log(f"No metadata found for hash {file_hash[:8]}... in {storage_source}", file=sys.stderr)
log(
f"No metadata found for hash {file_hash[:8]}... in {storage_source}",
file=sys.stderr,
)
return 1
# Extract title from tags if available
@@ -224,7 +239,9 @@ class Get_Metadata(Cmdlet):
if len(nums) == 2:
duration_seconds = float(nums[0] * 60 + nums[1])
else:
duration_seconds = float(nums[0] * 3600 + nums[1] * 60 + nums[2])
duration_seconds = float(
nums[0] * 3600 + nums[1] * 60 + nums[2]
)
else:
duration_seconds = None
pages = metadata.get("pages")
+8 -5
View File
@@ -25,7 +25,7 @@ class Get_Note(Cmdlet):
super().__init__(
name="get-note",
summary="List notes on a file in a store.",
usage="get-note -store <store> [-query \"hash:<sha256>\"]",
usage='get-note -store <store> [-query "hash:<sha256>"]',
alias=["get-notes", "get_note"],
arg=[
SharedArgs.STORE,
@@ -43,7 +43,9 @@ class Get_Note(Cmdlet):
pass
self.register()
def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]:
def _resolve_hash(
self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]
) -> Optional[str]:
resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash)
if resolved:
return resolved
@@ -76,7 +78,10 @@ class Get_Note(Cmdlet):
if store_override and query_hash:
results = [{"store": str(store_override), "hash": query_hash}]
else:
log("[get_note] Error: Requires piped item(s) or -store and -query \"hash:<sha256>\"", file=sys.stderr)
log(
'[get_note] Error: Requires piped item(s) or -store and -query "hash:<sha256>"',
file=sys.stderr,
)
return 1
store_registry = Store(config)
@@ -144,5 +149,3 @@ class Get_Note(Cmdlet):
CMDLET = Get_Note()
+117 -49
View File
@@ -29,7 +29,7 @@ from Store import Store
CMDLET = Cmdlet(
name="get-relationship",
summary="Print relationships for the selected file (Hydrus or Local).",
usage="get-relationship [-query \"hash:<sha256>\"]",
usage='get-relationship [-query "hash:<sha256>"]',
alias=[],
arg=[
SharedArgs.QUERY,
@@ -40,6 +40,7 @@ CMDLET = Cmdlet(
],
)
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
if should_show_help(_args):
@@ -64,9 +65,11 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
continue
i += 1
override_hash: str | None = sh.parse_single_hash_query(override_query) if override_query else None
override_hash: str | None = (
sh.parse_single_hash_query(override_query) if override_query else None
)
if override_query and not override_hash:
log("get-relationship requires -query \"hash:<sha256>\"", file=sys.stderr)
log('get-relationship requires -query "hash:<sha256>"', file=sys.stderr)
return 1
# Handle @N selection which creates a list
@@ -75,7 +78,10 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if len(result) == 0:
result = None
elif len(result) > 1 and not override_hash:
log("get-relationship expects a single item; select one row (e.g. @1) or pass -query \"hash:<sha256>\"", file=sys.stderr)
log(
'get-relationship expects a single item; select one row (e.g. @1) or pass -query "hash:<sha256>"',
file=sys.stderr,
)
return 1
else:
result = result[0]
@@ -87,9 +93,15 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
def _add_relationship(entry: Dict[str, Any]) -> None:
"""Add relationship if not already present by hash or path."""
for existing in found_relationships:
if entry.get("hash") and str(existing.get("hash", "")).lower() == str(entry["hash"]).lower():
if (
entry.get("hash")
and str(existing.get("hash", "")).lower() == str(entry["hash"]).lower()
):
return
if entry.get("path") and str(existing.get("path", "")).lower() == str(entry["path"]).lower():
if (
entry.get("path")
and str(existing.get("path", "")).lower() == str(entry["path"]).lower()
):
return
found_relationships.append(entry)
@@ -98,10 +110,18 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if not store_name:
store_name = get_field(result, "store")
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_hash_for_operation(None, result))
hash_hex = (
normalize_hash(override_hash)
if override_hash
else normalize_hash(get_hash_for_operation(None, result))
)
if not source_title or source_title == "Unknown":
source_title = get_field(result, "title") or get_field(result, "name") or (hash_hex[:16] + "..." if hash_hex else "Unknown")
source_title = (
get_field(result, "title")
or get_field(result, "name")
or (hash_hex[:16] + "..." if hash_hex else "Unknown")
)
local_db_checked = False
@@ -113,7 +133,11 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Folder store relationships
# IMPORTANT: only treat the Folder backend as a local DB store.
# Other backends may expose a location() method but are not SQLite folder stores.
if type(backend).__name__ == "Folder" and hasattr(backend, "location") and callable(getattr(backend, "location")):
if (
type(backend).__name__ == "Folder"
and hasattr(backend, "location")
and callable(getattr(backend, "location"))
):
storage_path = Path(str(backend.location()))
with API_folder_store(storage_path) as db:
local_db_checked = True
@@ -142,7 +166,9 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if not related_hash or related_hash == hash_hex:
continue
entry_type = "king" if str(rel_type).lower() == "alt" else str(rel_type)
entry_type = (
"king" if str(rel_type).lower() == "alt" else str(rel_type)
)
if entry_type == "king":
king_hashes.append(related_hash)
@@ -156,13 +182,15 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
except Exception:
pass
_add_relationship({
_add_relationship(
{
"hash": related_hash,
"type": entry_type,
"title": related_title,
"path": None,
"store": str(store_name),
})
}
)
# Reverse relationships (alts pointing to this hash)
try:
@@ -187,13 +215,15 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
pass
entry_type = "alt" if rel_type == "alt" else (rel_type or "related")
_add_relationship({
_add_relationship(
{
"hash": child_hash,
"type": entry_type,
"title": child_title,
"path": None,
"store": str(store_name),
})
}
)
# Siblings (alts that share the same king)
for king_hash in king_hashes:
@@ -218,13 +248,15 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
pass
entry_type = "alt" if sib_type == "alt" else (sib_type or "related")
_add_relationship({
_add_relationship(
{
"hash": sib_hash,
"type": entry_type,
"title": sib_title,
"path": None,
"store": str(store_name),
})
}
)
except Exception as e:
log(f"Error checking store relationships: {e}", file=sys.stderr)
@@ -234,7 +266,11 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# But if the file is also in Hydrus, we might want those too.
# Let's try Hydrus if we have a hash.
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_hash_for_operation(None, result))
hash_hex = (
normalize_hash(override_hash)
if override_hash
else normalize_hash(get_hash_for_operation(None, result))
)
if hash_hex and not local_db_checked:
try:
@@ -274,7 +310,11 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if backend_obj is not None and hasattr(backend_obj, "get_tag"):
try:
tag_result = backend_obj.get_tag(h)
tags = tag_result[0] if isinstance(tag_result, tuple) and tag_result else tag_result
tags = (
tag_result[0]
if isinstance(tag_result, tuple) and tag_result
else tag_result
)
if isinstance(tags, list):
for t in tags:
if isinstance(t, str) and t.lower().startswith("title:"):
@@ -308,10 +348,14 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if isinstance(storage, dict):
for group in storage.values():
if isinstance(group, list):
tag_candidates.extend([str(x) for x in group if isinstance(x, str)])
tag_candidates.extend(
[str(x) for x in group if isinstance(x, str)]
)
display = svc_data.get("display_tags")
if isinstance(display, list):
tag_candidates.extend([str(x) for x in display if isinstance(x, str)])
tag_candidates.extend(
[str(x) for x in display if isinstance(x, str)]
)
flat = meta.get("tags_flat")
if isinstance(flat, list):
tag_candidates.extend([str(x) for x in flat if isinstance(x, str)])
@@ -353,16 +397,25 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Some Hydrus responses provide a direct king hash under the 'king' key.
if key == "king":
king_hash = normalize_hash(rel_value) if isinstance(rel_value, str) else None
king_hash = (
normalize_hash(rel_value)
if isinstance(rel_value, str)
else None
)
if king_hash and king_hash != hash_hex:
if not any(str(r.get('hash', '')).lower() == king_hash for r in found_relationships):
found_relationships.append({
if not any(
str(r.get("hash", "")).lower() == king_hash
for r in found_relationships
):
found_relationships.append(
{
"hash": king_hash,
"type": "king",
"title": _resolve_related_title(king_hash),
"path": None,
"store": store_label,
})
}
)
continue
rel_name = rel_map.get(key, f"type-{key}")
@@ -370,29 +423,43 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# The relationship value is typically a list of hashes.
if isinstance(rel_value, list):
for rel_hash in rel_value:
rel_hash_norm = normalize_hash(rel_hash) if isinstance(rel_hash, str) else None
rel_hash_norm = (
normalize_hash(rel_hash)
if isinstance(rel_hash, str)
else None
)
if not rel_hash_norm or rel_hash_norm == hash_hex:
continue
if not any(str(r.get('hash', '')).lower() == rel_hash_norm for r in found_relationships):
found_relationships.append({
if not any(
str(r.get("hash", "")).lower() == rel_hash_norm
for r in found_relationships
):
found_relationships.append(
{
"hash": rel_hash_norm,
"type": rel_name,
"title": _resolve_related_title(rel_hash_norm),
"path": None,
"store": store_label,
})
}
)
# Defensive: sometimes the API may return a single hash string.
elif isinstance(rel_value, str):
rel_hash_norm = normalize_hash(rel_value)
if rel_hash_norm and rel_hash_norm != hash_hex:
if not any(str(r.get('hash', '')).lower() == rel_hash_norm for r in found_relationships):
found_relationships.append({
if not any(
str(r.get("hash", "")).lower() == rel_hash_norm
for r in found_relationships
):
found_relationships.append(
{
"hash": rel_hash_norm,
"type": rel_name,
"title": _resolve_related_title(rel_hash_norm),
"path": None,
"store": store_label,
})
}
)
except Exception as exc:
# Only log error if we didn't find local relationships either
if not found_relationships:
@@ -402,6 +469,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
try:
from rich.panel import Panel
from rich_display import stdout_console
title = source_title or (hash_hex[:16] + "..." if hash_hex else "Item")
stdout_console().print(Panel(f"{title} has no relationships", title="Relationships"))
except Exception:
@@ -414,44 +482,46 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Sort by type then title
# Custom sort order: King first, then Derivative, then others
def type_sort_key(item):
t = item['type'].lower()
if t == 'king':
t = item["type"].lower()
if t == "king":
return 0
elif t == 'derivative':
elif t == "derivative":
return 1
elif t in {'alternative', 'alternate', 'alt'}:
elif t in {"alternative", "alternate", "alt"}:
return 2
elif t == 'duplicate':
elif t == "duplicate":
return 3
else:
return 4
found_relationships.sort(key=lambda x: (type_sort_key(x), x['title']))
found_relationships.sort(key=lambda x: (type_sort_key(x), x["title"]))
pipeline_results = []
for i, item in enumerate(found_relationships):
row = table.add_row()
row.add_column("Type", item['type'].title())
row.add_column("Title", item['title'])
row.add_column("Type", item["type"].title())
row.add_column("Title", item["title"])
# row.add_column("Hash", item['hash'][:16] + "...") # User requested removal
row.add_column("Store", item['store'])
row.add_column("Store", item["store"])
# Create result object for pipeline
res_obj = {
"title": item['title'],
"hash": item['hash'],
"file_hash": item['hash'],
"relationship_type": item['type'],
"store": item['store']
"title": item["title"],
"hash": item["hash"],
"file_hash": item["hash"],
"relationship_type": item["type"],
"store": item["store"],
}
# Target is always hash in store/hash-first mode
res_obj["target"] = item['hash']
res_obj["target"] = item["hash"]
pipeline_results.append(res_obj)
# Set selection args
table.set_row_selection_args(i, ["-store", str(item['store']), "-query", f"hash:{item['hash']}"])
table.set_row_selection_args(
i, ["-store", str(item["store"]), "-query", f"hash:{item['hash']}"]
)
ctx.set_last_result_table(table, pipeline_results)
from rich_display import stdout_console
@@ -463,5 +533,3 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
CMDLET.exec = _run
CMDLET.register()
+270 -141
View File
@@ -16,6 +16,7 @@ from SYS.logger import log, debug
try:
from Provider.openlibrary import OpenLibrary
_ol_scrape_isbn_metadata = OpenLibrary.scrape_isbn_metadata
_ol_scrape_openlibrary_metadata = OpenLibrary.scrape_openlibrary_metadata
except Exception:
@@ -72,6 +73,7 @@ def _extract_subtitle_tags(info: Dict[str, Any]) -> List[str]:
- subs:<lang>
- subs_auto:<lang>
"""
def _langs(value: Any) -> List[str]:
if not isinstance(value, dict):
return []
@@ -101,6 +103,7 @@ def _scrape_ytdlp_info(url: str) -> Optional[Dict[str, Any]]:
# Prefer the Python module when available (faster, avoids shell quoting issues).
try:
import yt_dlp # type: ignore
opts: Any = {
"quiet": True,
"no_warnings": True,
@@ -119,6 +122,7 @@ def _scrape_ytdlp_info(url: str) -> Optional[Dict[str, Any]]:
# Fallback to yt-dlp CLI if the module isn't available.
try:
import json as json_module
cmd = [
"yt-dlp",
"-J",
@@ -160,7 +164,9 @@ def _resolve_candidate_urls_for_item(
if normalize_urls:
urls.extend(normalize_urls(backend_urls))
else:
urls.extend([str(u).strip() for u in backend_urls if isinstance(u, str) and str(u).strip()])
urls.extend(
[str(u).strip() for u in backend_urls if isinstance(u, str) and str(u).strip()]
)
except Exception:
pass
@@ -173,7 +179,9 @@ def _resolve_candidate_urls_for_item(
else:
raw = meta.get("url")
if isinstance(raw, list):
urls.extend([str(u).strip() for u in raw if isinstance(u, str) and str(u).strip()])
urls.extend(
[str(u).strip() for u in raw if isinstance(u, str) and str(u).strip()]
)
elif isinstance(raw, str) and raw.strip():
urls.append(raw.strip())
except Exception:
@@ -238,6 +246,7 @@ def _pick_supported_ytdlp_url(urls: List[str]) -> Optional[str]:
# Prefer a true support check when the Python module is available.
try:
from SYS.download import is_url_supported_by_ytdlp
for text in candidates:
try:
if is_url_supported_by_ytdlp(text):
@@ -255,12 +264,10 @@ _scrape_isbn_metadata = _ol_scrape_isbn_metadata # type: ignore[assignment]
_scrape_openlibrary_metadata = _ol_scrape_openlibrary_metadata # type: ignore[assignment]
# Tag item for ResultTable display and piping
from dataclasses import dataclass
@dataclass
class TagItem:
"""Tag item for display in ResultTable and piping to other cmdlet.
@@ -269,6 +276,7 @@ class TagItem:
- delete-tag @{3,4,9} (delete tags at indices 3, 4, 9)
- add-tag @"namespace:value" (add this tag)
"""
tag_name: str
tag_index: int # 1-based index for user reference
hash: Optional[str] = None
@@ -367,6 +375,8 @@ def _filter_scraped_tags(tags: List[str]) -> List[str]:
seen.add(key)
out.append(s)
return out
def _summarize_tags(tags_list: List[str], limit: int = 8) -> str:
"""Create a summary of tags for display."""
shown = [t for t in tags_list[:limit] if t]
@@ -425,11 +435,12 @@ def _rename_file_if_title_tag(media: Optional[Path], tags_added: List[str]) -> b
old_name = file_path.name
# Get file extension
suffix = file_path.suffix or ''
suffix = file_path.suffix or ""
# Sanitize title for use as filename
import re
safe_title = re.sub(r'[<>:"/\\|?*]', '', title_value).strip()
safe_title = re.sub(r'[<>:"/\\|?*]', "", title_value).strip()
if not safe_title:
return False
@@ -440,8 +451,8 @@ def _rename_file_if_title_tag(media: Optional[Path], tags_added: List[str]) -> b
return False
# Build sidecar paths BEFORE renaming the file
old_sidecar = Path(str(file_path) + '.tag')
new_sidecar = Path(str(new_file_path) + '.tag')
old_sidecar = Path(str(file_path) + ".tag")
new_sidecar = Path(str(new_file_path) + ".tag")
# Rename file
try:
@@ -484,8 +495,8 @@ def _handle_title_rename(old_path: Path, tags_list: List[str]) -> Optional[Path]
# Extract title from tags
new_title = None
for tag in tags_list:
if isinstance(tag, str) and tag.lower().startswith('title:'):
new_title = tag.split(':', 1)[1].strip()
if isinstance(tag, str) and tag.lower().startswith("title:"):
new_title = tag.split(":", 1)[1].strip()
break
if not new_title or not old_path.exists():
@@ -513,14 +524,18 @@ def _handle_title_rename(old_path: Path, tags_list: List[str]) -> Optional[Path]
log(f"Renamed file: {old_name}{new_name}", file=sys.stderr)
# Rename the .tag sidecar if it exists
old_tags_path = old_path.parent / (old_name + '.tag')
old_tags_path = old_path.parent / (old_name + ".tag")
if old_tags_path.exists():
new_tags_path = old_path.parent / (new_name + '.tag')
new_tags_path = old_path.parent / (new_name + ".tag")
if new_tags_path.exists():
log(f"Warning: Target sidecar already exists: {new_tags_path.name}", file=sys.stderr)
log(
f"Warning: Target sidecar already exists: {new_tags_path.name}", file=sys.stderr
)
else:
old_tags_path.rename(new_tags_path)
log(f"Renamed sidecar: {old_tags_path.name}{new_tags_path.name}", file=sys.stderr)
log(
f"Renamed sidecar: {old_tags_path.name}{new_tags_path.name}", file=sys.stderr
)
return new_path
except Exception as exc:
@@ -528,7 +543,6 @@ def _handle_title_rename(old_path: Path, tags_list: List[str]) -> Optional[Path]
return None
def _read_sidecar_fallback(p: Path) -> tuple[Optional[str], List[str], List[str]]:
"""Fallback sidecar reader if metadata module unavailable.
@@ -579,7 +593,9 @@ def _read_sidecar_fallback(p: Path) -> tuple[Optional[str], List[str], List[str]
return h, t, u
def _write_sidecar(p: Path, media: Path, tag_list: List[str], url: List[str], hash_in_sidecar: Optional[str]) -> Path:
def _write_sidecar(
p: Path, media: Path, tag_list: List[str], url: List[str], hash_in_sidecar: Optional[str]
) -> Path:
"""Write tags to sidecar file and handle title-based renaming.
Returns the new media path if renamed, otherwise returns the original media path.
@@ -613,8 +629,14 @@ def _write_sidecar(p: Path, media: Path, tag_list: List[str], url: List[str], ha
return media
def _emit_tag_payload(source: str, tags_list: List[str], *, hash_value: Optional[str], extra: Optional[Dict[str, Any]] = None, store_label: Optional[str] = None) -> int:
def _emit_tag_payload(
source: str,
tags_list: List[str],
*,
hash_value: Optional[str],
extra: Optional[Dict[str, Any]] = None,
store_label: Optional[str] = None,
) -> int:
"""Emit tag values as structured payload to pipeline."""
payload: Dict[str, Any] = {
"source": source,
@@ -640,11 +662,7 @@ def _emit_tag_payload(source: str, tags_list: List[str], *, hash_value: Optional
if ctx.get_stage_context() is not None:
for idx, tag_name in enumerate(tags_list, start=1):
tag_item = TagItem(
tag_name=tag_name,
tag_index=idx,
hash=hash_value,
store=source,
service_name=None
tag_name=tag_name, tag_index=idx, hash=hash_value, store=source, service_name=None
)
ctx.emit(tag_item)
else:
@@ -654,34 +672,40 @@ def _emit_tag_payload(source: str, tags_list: List[str], *, hash_value: Optional
return 0
def _extract_scrapable_identifiers(tags_list: List[str]) -> Dict[str, str]:
"""Extract scrapable identifiers from tags."""
identifiers = {}
scrapable_prefixes = {
'openlibrary', 'isbn', 'isbn_10', 'isbn_13',
'musicbrainz', 'musicbrainzalbum', 'imdb', 'tmdb', 'tvdb'
"openlibrary",
"isbn",
"isbn_10",
"isbn_13",
"musicbrainz",
"musicbrainzalbum",
"imdb",
"tmdb",
"tvdb",
}
for tag in tags_list:
if not isinstance(tag, str) or ':' not in tag:
if not isinstance(tag, str) or ":" not in tag:
continue
parts = tag.split(':', 1)
parts = tag.split(":", 1)
if len(parts) != 2:
continue
key_raw = parts[0].strip().lower()
key = key_raw.replace('-', '_')
if key == 'isbn10':
key = 'isbn_10'
elif key == 'isbn13':
key = 'isbn_13'
key = key_raw.replace("-", "_")
if key == "isbn10":
key = "isbn_10"
elif key == "isbn13":
key = "isbn_13"
value = parts[1].strip()
# Normalize ISBN values by removing hyphens for API friendliness
if key.startswith('isbn'):
value = value.replace('-', '')
if key.startswith("isbn"):
value = value.replace("-", "")
if key in scrapable_prefixes and value:
identifiers[key] = value
@@ -693,9 +717,9 @@ def _extract_tag_value(tags_list: List[str], namespace: str) -> Optional[str]:
"""Get first tag value for a namespace (e.g., artist:, title:)."""
ns = namespace.lower()
for tag in tags_list:
if not isinstance(tag, str) or ':' not in tag:
if not isinstance(tag, str) or ":" not in tag:
continue
prefix, _, value = tag.partition(':')
prefix, _, value = tag.partition(":")
if prefix.strip().lower() != ns:
continue
candidate = value.strip()
@@ -704,7 +728,9 @@ def _extract_tag_value(tags_list: List[str], namespace: str) -> Optional[str]:
return None
def _scrape_url_metadata(url: str) -> Tuple[Optional[str], List[str], List[Tuple[str, str]], List[Dict[str, Any]]]:
def _scrape_url_metadata(
url: str,
) -> Tuple[Optional[str], List[str], List[Tuple[str, str]], List[Dict[str, Any]]]:
"""Scrape metadata from a URL using yt-dlp.
Returns:
@@ -730,9 +756,11 @@ def _scrape_url_metadata(url: str) -> Tuple[Optional[str], List[str], List[Tuple
"yt-dlp",
"-j", # Output JSON
"--no-warnings",
"--playlist-items", "1-10", # Get first 10 items if it's a playlist (provides entries)
"-f", "best",
url
"--playlist-items",
"1-10", # Get first 10 items if it's a playlist (provides entries)
"-f",
"best",
url,
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
@@ -743,7 +771,7 @@ def _scrape_url_metadata(url: str) -> Tuple[Optional[str], List[str], List[Tuple
# Parse JSON output - WITHOUT --flat-playlist, we get ONE JSON object with 'entries' array
# This gives us full metadata instead of flat format
lines = result.stdout.strip().split('\n')
lines = result.stdout.strip().split("\n")
if not lines or not lines[0]:
log("yt-dlp returned empty output", file=sys.stderr)
return None, [], [], []
@@ -756,7 +784,7 @@ def _scrape_url_metadata(url: str) -> Tuple[Optional[str], List[str], List[Tuple
return None, [], [], []
# Extract title - use the main title
title = data.get('title', 'Unknown')
title = data.get("title", "Unknown")
# Determine if this is a playlist/album (has entries array)
# is_playlist = 'entries' in data and isinstance(data.get('entries'), list)
@@ -772,20 +800,22 @@ def _scrape_url_metadata(url: str) -> Tuple[Optional[str], List[str], List[Tuple
tags.extend(album_tags)
# Case 1: Entries are nested in the main object (standard playlist structure)
if 'entries' in data and isinstance(data.get('entries'), list):
entries = data['entries']
if "entries" in data and isinstance(data.get("entries"), list):
entries = data["entries"]
# Build playlist items with title and duration
for idx, entry in enumerate(entries, 1):
if isinstance(entry, dict):
item_title = entry.get('title', entry.get('id', f'Track {idx}'))
item_duration = entry.get('duration', 0)
playlist_items.append({
'index': idx,
'id': entry.get('id', f'track_{idx}'),
'title': item_title,
'duration': item_duration,
'url': entry.get('url') or entry.get('webpage_url', ''),
})
item_title = entry.get("title", entry.get("id", f"Track {idx}"))
item_duration = entry.get("duration", 0)
playlist_items.append(
{
"index": idx,
"id": entry.get("id", f"track_{idx}"),
"title": item_title,
"duration": item_duration,
"url": entry.get("url") or entry.get("webpage_url", ""),
}
)
# Extract tags from each entry and merge (but don't duplicate album-level tags)
# Only merge entry tags that are multi-value prefixes (not single-value like title:, artist:, etc.)
@@ -793,18 +823,29 @@ def _scrape_url_metadata(url: str) -> Tuple[Optional[str], List[str], List[Tuple
entry_tags = extract_ytdlp_tags(entry)
# Single-value namespaces that should not be duplicated from entries
single_value_namespaces = {'title', 'artist', 'album', 'creator', 'channel', 'release_date', 'upload_date', 'license', 'location'}
single_value_namespaces = {
"title",
"artist",
"album",
"creator",
"channel",
"release_date",
"upload_date",
"license",
"location",
}
for tag in entry_tags:
# Extract the namespace (part before the colon)
tag_namespace = tag.split(':', 1)[0].lower() if ':' in tag else None
tag_namespace = tag.split(":", 1)[0].lower() if ":" in tag else None
# Skip if this namespace already exists in tags (from album level)
if tag_namespace and tag_namespace in single_value_namespaces:
# Check if any tag with this namespace already exists in tags
already_has_namespace = any(
t.split(':', 1)[0].lower() == tag_namespace
for t in tags if ':' in t
t.split(":", 1)[0].lower() == tag_namespace
for t in tags
if ":" in t
)
if already_has_namespace:
continue # Skip this tag, keep the album-level one
@@ -814,53 +855,48 @@ def _scrape_url_metadata(url: str) -> Tuple[Optional[str], List[str], List[Tuple
# Case 2: Playlist detected by playlist_count field (BandCamp albums, etc.)
# These need a separate call with --flat-playlist to get the actual entries
elif (data.get('playlist_count') or 0) > 0 and 'entries' not in data:
elif (data.get("playlist_count") or 0) > 0 and "entries" not in data:
try:
# Make a second call with --flat-playlist to get the actual tracks
flat_cmd = [
"yt-dlp",
"-j",
"--no-warnings",
"--flat-playlist",
"-f", "best",
url
]
flat_cmd = ["yt-dlp", "-j", "--no-warnings", "--flat-playlist", "-f", "best", url]
flat_result = subprocess.run(flat_cmd, capture_output=True, text=True, timeout=30)
if flat_result.returncode == 0:
flat_lines = flat_result.stdout.strip().split('\n')
flat_lines = flat_result.stdout.strip().split("\n")
# With --flat-playlist, each line is a separate track JSON object
# (not nested in a playlist container), so process ALL lines
for idx, line in enumerate(flat_lines, 1):
if line.strip().startswith('{'):
if line.strip().startswith("{"):
try:
entry = json_module.loads(line)
item_title = entry.get('title', entry.get('id', f'Track {idx}'))
item_duration = entry.get('duration', 0)
playlist_items.append({
'index': idx,
'id': entry.get('id', f'track_{idx}'),
'title': item_title,
'duration': item_duration,
'url': entry.get('url') or entry.get('webpage_url', ''),
})
item_title = entry.get("title", entry.get("id", f"Track {idx}"))
item_duration = entry.get("duration", 0)
playlist_items.append(
{
"index": idx,
"id": entry.get("id", f"track_{idx}"),
"title": item_title,
"duration": item_duration,
"url": entry.get("url") or entry.get("webpage_url", ""),
}
)
except json_module.JSONDecodeError:
pass
except Exception as e:
pass # Silently ignore if we can't get playlist entries
# Fallback: if still no tags detected, get from first item
if not tags and extract_ytdlp_tags:
tags = extract_ytdlp_tags(data)
# Extract formats from the main data object
formats = []
if 'formats' in data:
formats = _extract_url_formats(data.get('formats', []))
if "formats" in data:
formats = _extract_url_formats(data.get("formats", []))
# Deduplicate tags by namespace to prevent duplicate title:, artist:, etc.
try:
from metadata import dedup_tags_by_namespace as _dedup
if _dedup:
tags = _dedup(tags, keep_first=True)
except Exception:
@@ -886,47 +922,49 @@ def _extract_url_formats(formats: list) -> List[Tuple[str, str]]:
audio_formats = {} # {quality_label: format_data}
for fmt in formats:
vcodec = fmt.get('vcodec', 'none')
acodec = fmt.get('acodec', 'none')
height = fmt.get('height')
ext = fmt.get('ext', 'unknown')
format_id = fmt.get('format_id', '')
tbr = fmt.get('tbr', 0)
abr = fmt.get('abr', 0)
vcodec = fmt.get("vcodec", "none")
acodec = fmt.get("acodec", "none")
height = fmt.get("height")
ext = fmt.get("ext", "unknown")
format_id = fmt.get("format_id", "")
tbr = fmt.get("tbr", 0)
abr = fmt.get("abr", 0)
# Video format
if vcodec and vcodec != 'none' and height:
if vcodec and vcodec != "none" and height:
if height < 480:
continue
res_key = f"{height}p"
if res_key not in video_formats or tbr > video_formats[res_key].get('tbr', 0):
if res_key not in video_formats or tbr > video_formats[res_key].get("tbr", 0):
video_formats[res_key] = {
'label': f"{height}p ({ext})",
'format_id': format_id,
'tbr': tbr,
"label": f"{height}p ({ext})",
"format_id": format_id,
"tbr": tbr,
}
# Audio-only format
elif acodec and acodec != 'none' and (not vcodec or vcodec == 'none'):
elif acodec and acodec != "none" and (not vcodec or vcodec == "none"):
audio_key = f"audio_{abr}"
if audio_key not in audio_formats or abr > audio_formats[audio_key].get('abr', 0):
if audio_key not in audio_formats or abr > audio_formats[audio_key].get("abr", 0):
audio_formats[audio_key] = {
'label': f"audio ({ext})",
'format_id': format_id,
'abr': abr,
"label": f"audio ({ext})",
"format_id": format_id,
"abr": abr,
}
result = []
# Add video formats in descending resolution order
for res in sorted(video_formats.keys(), key=lambda x: int(x.replace('p', '')), reverse=True):
for res in sorted(
video_formats.keys(), key=lambda x: int(x.replace("p", "")), reverse=True
):
fmt = video_formats[res]
result.append((fmt['label'], fmt['format_id']))
result.append((fmt["label"], fmt["format_id"]))
# Add best audio format
if audio_formats:
best_audio = max(audio_formats.values(), key=lambda x: x.get('abr', 0))
result.append((best_audio['label'], best_audio['format_id']))
best_audio = max(audio_formats.values(), key=lambda x: x.get("abr", 0))
result.append((best_audio["label"], best_audio["format_id"]))
return result
@@ -975,13 +1013,13 @@ def _perform_scraping(tags_list: List[str]) -> List[str]:
new_tags = []
# Prefer OpenLibrary over ISBN (more complete metadata)
if 'openlibrary' in identifiers:
olid = identifiers['openlibrary']
if "openlibrary" in identifiers:
olid = identifiers["openlibrary"]
if olid:
log(f"Scraping OpenLibrary: {olid}")
new_tags.extend(_scrape_openlibrary_metadata(olid))
elif 'isbn_13' in identifiers or 'isbn_10' in identifiers or 'isbn' in identifiers:
isbn = identifiers.get('isbn_13') or identifiers.get('isbn_10') or identifiers.get('isbn')
elif "isbn_13" in identifiers or "isbn_10" in identifiers or "isbn" in identifiers:
isbn = identifiers.get("isbn_13") or identifiers.get("isbn_10") or identifiers.get("isbn")
if isbn:
log(f"Scraping ISBN: {isbn}")
new_tags.extend(_scrape_isbn_metadata(isbn))
@@ -1028,11 +1066,16 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if 0 <= idx < len(items_pool):
result = items_pool[idx]
args_list = []
debug(f"[get_tag] Resolved numeric selection arg {token} -> last_result_items[{idx}]")
debug(
f"[get_tag] Resolved numeric selection arg {token} -> last_result_items[{idx}]"
)
else:
debug(f"[get_tag] Numeric selection arg {token} out of range (items={len(items_pool)})")
debug(
f"[get_tag] Numeric selection arg {token} out of range (items={len(items_pool)})"
)
except Exception as exc:
debug(f"[get_tag] Failed to resolve numeric selection arg {token}: {exc}")
# Helper to get field from both dict and object
def get_field(obj: Any, field: str, default: Any = None) -> Any:
if isinstance(obj, dict):
@@ -1078,23 +1121,41 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
#
# NOTE: We intentionally do not reuse _scrape_url_metadata() here because it
# performs namespace deduplication that would collapse multi-valued tags.
file_hash = normalize_hash(hash_override) or normalize_hash(get_field(result, "hash", None))
file_hash = normalize_hash(hash_override) or normalize_hash(
get_field(result, "hash", None)
)
store_name = get_field(result, "store", None)
subject_path = get_field(result, "path", None) or get_field(result, "target", None) or get_field(result, "filename", None)
item_title = get_field(result, "title", None) or get_field(result, "name", None) or get_field(result, "filename", None)
subject_path = (
get_field(result, "path", None)
or get_field(result, "target", None)
or get_field(result, "filename", None)
)
item_title = (
get_field(result, "title", None)
or get_field(result, "name", None)
or get_field(result, "filename", None)
)
# Only run overwrite-apply when the item is store-backed.
# If this is a URL-only PipeObject, fall through to provider mode below.
if file_hash and store_name and str(file_hash).strip().lower() != "unknown" and str(store_name).strip().upper() not in {"PATH", "URL"}:
if (
file_hash
and store_name
and str(file_hash).strip().lower() != "unknown"
and str(store_name).strip().upper() not in {"PATH", "URL"}
):
try:
from Store import Store
storage = Store(config)
backend = storage[str(store_name)]
except Exception as exc:
log(f"Failed to resolve store backend '{store_name}': {exc}", file=sys.stderr)
return 1
candidate_urls = _resolve_candidate_urls_for_item(result, backend, file_hash, config)
candidate_urls = _resolve_candidate_urls_for_item(
result, backend, file_hash, config
)
scrape_target = _pick_supported_ytdlp_url(candidate_urls)
if not scrape_target:
log(
@@ -1109,7 +1170,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
info = _scrape_ytdlp_info(scrape_target)
if not info:
log("yt-dlp could not extract metadata for this URL (unsupported or failed)", file=sys.stderr)
log(
"yt-dlp could not extract metadata for this URL (unsupported or failed)",
file=sys.stderr,
)
return 1
try:
@@ -1135,7 +1199,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Subtitle availability tags
try:
tags.extend(_extract_subtitle_tags(info_for_subs if isinstance(info_for_subs, dict) else {}))
tags.extend(
_extract_subtitle_tags(
info_for_subs if isinstance(info_for_subs, dict) else {}
)
)
except Exception:
pass
@@ -1212,11 +1280,14 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# IMPORTANT: do not rely on `result.tag` for this because it can be stale (cached on
# the piped PipeObject). Always prefer the current store-backed tags when possible.
identifier_tags: List[str] = []
file_hash_for_scrape = normalize_hash(hash_override) or normalize_hash(get_field(result, "hash", None))
file_hash_for_scrape = normalize_hash(hash_override) or normalize_hash(
get_field(result, "hash", None)
)
store_for_scrape = get_field(result, "store", None)
if file_hash_for_scrape and store_for_scrape:
try:
from Store import Store
storage = Store(config)
backend = storage[str(store_for_scrape)]
current_tags, _src = backend.get_tag(file_hash_for_scrape, config=config)
@@ -1234,14 +1305,24 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# As a last resort, try local sidecar only when the item is not store-backed.
if not identifier_tags and (not file_hash_for_scrape or not store_for_scrape):
file_path = get_field(result, "target", None) or get_field(result, "path", None) or get_field(result, "filename", None)
if isinstance(file_path, str) and file_path and not file_path.lower().startswith(("http://", "https://")):
file_path = (
get_field(result, "target", None)
or get_field(result, "path", None)
or get_field(result, "filename", None)
)
if (
isinstance(file_path, str)
and file_path
and not file_path.lower().startswith(("http://", "https://"))
):
try:
media_path = Path(str(file_path))
if media_path.exists():
tags_from_sidecar = read_sidecar(media_path)
if isinstance(tags_from_sidecar, list):
identifier_tags = [str(t) for t in tags_from_sidecar if isinstance(t, (str, bytes))]
identifier_tags = [
str(t) for t in tags_from_sidecar if isinstance(t, (str, bytes))
]
except Exception:
pass
@@ -1252,17 +1333,30 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
identifier_query: Optional[str] = None
if identifiers:
if provider.name in {"openlibrary", "googlebooks", "google"}:
identifier_query = identifiers.get("isbn_13") or identifiers.get("isbn_10") or identifiers.get("isbn") or identifiers.get("openlibrary")
identifier_query = (
identifiers.get("isbn_13")
or identifiers.get("isbn_10")
or identifiers.get("isbn")
or identifiers.get("openlibrary")
)
elif provider.name == "itunes":
identifier_query = identifiers.get("musicbrainz") or identifiers.get("musicbrainzalbum")
identifier_query = identifiers.get("musicbrainz") or identifiers.get(
"musicbrainzalbum"
)
# Determine query from identifier first, else title on the result or filename
title_hint = title_from_tags or get_field(result, "title", None) or get_field(result, "name", None)
title_hint = (
title_from_tags or get_field(result, "title", None) or get_field(result, "name", None)
)
if not title_hint:
file_path = get_field(result, "path", None) or get_field(result, "filename", None)
if file_path:
title_hint = Path(str(file_path)).stem
artist_hint = artist_from_tags or get_field(result, "artist", None) or get_field(result, "uploader", None)
artist_hint = (
artist_from_tags
or get_field(result, "artist", None)
or get_field(result, "uploader", None)
)
if not artist_hint:
meta_field = get_field(result, "metadata", None)
if isinstance(meta_field, dict):
@@ -1271,7 +1365,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
artist_hint = str(meta_artist)
combined_query: Optional[str] = None
if not identifier_query and title_hint and artist_hint and provider.name in {"itunes", "musicbrainz"}:
if (
not identifier_query
and title_hint
and artist_hint
and provider.name in {"itunes", "musicbrainz"}
):
if provider.name == "musicbrainz":
combined_query = f'recording:"{title_hint}" AND artist:"{artist_hint}"'
else:
@@ -1280,7 +1379,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# yt-dlp isn't a search provider; it requires a URL.
url_hint: Optional[str] = None
if provider.name == "ytdlp":
raw_url = get_field(result, "url", None) or get_field(result, "source_url", None) or get_field(result, "target", None)
raw_url = (
get_field(result, "url", None)
or get_field(result, "source_url", None)
or get_field(result, "target", None)
)
if isinstance(raw_url, list) and raw_url:
raw_url = raw_url[0]
if isinstance(raw_url, str) and raw_url.strip().startswith(("http://", "https://")):
@@ -1325,14 +1428,21 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
return 0
from result_table import ResultTable
table = ResultTable(f"Metadata: {provider.name}")
table.set_source_command("get-tag", [])
selection_payload = []
hash_for_payload = normalize_hash(hash_override) or normalize_hash(get_field(result, "hash", None))
hash_for_payload = normalize_hash(hash_override) or normalize_hash(
get_field(result, "hash", None)
)
store_for_payload = get_field(result, "store", None)
# Preserve a consistent path field when present so selecting a metadata row
# keeps referring to the original file.
path_for_payload = get_field(result, "path", None) or get_field(result, "target", None) or get_field(result, "filename", None)
path_for_payload = (
get_field(result, "path", None)
or get_field(result, "target", None)
or get_field(result, "filename", None)
)
for idx, item in enumerate(items):
tags = _filter_scraped_tags(provider.to_tags(item))
row = table.add_row()
@@ -1380,7 +1490,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if result_provider and isinstance(result_tags, list) and result_tags:
file_hash = normalize_hash(hash_override) or normalize_hash(get_field(result, "hash", None))
store_name = get_field(result, "store", None)
subject_path = get_field(result, "path", None) or get_field(result, "target", None) or get_field(result, "filename", None)
subject_path = (
get_field(result, "path", None)
or get_field(result, "target", None)
or get_field(result, "filename", None)
)
if not file_hash or not store_name:
log("Selected metadata row is missing hash/store; cannot apply tags", file=sys.stderr)
_emit_tags_as_table(
@@ -1401,10 +1515,14 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
else:
apply_tags = _filter_scraped_tags([str(t) for t in result_tags if t is not None])
if not apply_tags:
log("No applicable scraped tags to apply (title:/artist:/source: are skipped)", file=sys.stderr)
log(
"No applicable scraped tags to apply (title:/artist:/source: are skipped)",
file=sys.stderr,
)
return 0
try:
from Store import Store
storage = Store(config)
backend = storage[str(store_name)]
ok = bool(backend.add_tag(file_hash, apply_tags, config=config))
@@ -1428,7 +1546,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
store=str(store_name),
service_name=None,
config=config,
item_title=str(get_field(result, "title", None) or get_field(result, "name", None) or str(result_provider)),
item_title=str(
get_field(result, "title", None)
or get_field(result, "name", None)
or str(result_provider)
),
path=str(subject_path) if subject_path else None,
subject={
"hash": file_hash,
@@ -1445,7 +1567,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Only use emit mode if explicitly requested with --emit flag, not just because we're in a pipeline
# This allows interactive REPL to work even in pipelines
emit_mode = emit_requested or bool(store_key)
store_label = (store_key.strip() if store_key and store_key.strip() else None)
store_label = store_key.strip() if store_key and store_key.strip() else None
# Get hash and store from result
store_name = get_field(result, "store")
@@ -1461,6 +1583,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Get tags using storage backend
try:
from Store import Store
storage = Store(config)
backend = storage[store_name]
current, source = backend.get_tag(file_hash, config=config)
@@ -1479,11 +1602,19 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Always output to ResultTable (pipeline mode only)
# Extract title for table header
item_title = get_field(result, "title", None) or get_field(result, "name", None) or get_field(result, "filename", None)
item_title = (
get_field(result, "title", None)
or get_field(result, "name", None)
or get_field(result, "filename", None)
)
# Build a subject payload representing the file whose tags are being shown
subject_store = get_field(result, "store", None) or store_name
subject_path = get_field(result, "path", None) or get_field(result, "target", None) or get_field(result, "filename", None)
subject_path = (
get_field(result, "path", None)
or get_field(result, "target", None)
or get_field(result, "filename", None)
)
subject_payload: Dict[str, Any] = {
"tag": list(current),
"title": item_title,
@@ -1540,7 +1671,7 @@ class Get_Tag(Cmdlet):
super().__init__(
name="get-tag",
summary="Get tag values from Hydrus or local sidecar metadata",
usage="get-tag [-query \"hash:<sha256>\"] [--store <key>] [--emit] [-scrape <url|provider>]",
usage='get-tag [-query "hash:<sha256>"] [--store <key>] [--emit] [-scrape <url|provider>]',
alias=[],
arg=[
SharedArgs.QUERY,
@@ -1548,13 +1679,13 @@ class Get_Tag(Cmdlet):
name="-store",
type="string",
description="Store result to this key for pipeline",
alias="store"
alias="store",
),
CmdletArg(
name="-emit",
type="flag",
description="Emit result without interactive prompt (quiet mode)",
alias="emit-only"
alias="emit-only",
),
CmdletArg(
name="-scrape",
@@ -1562,14 +1693,14 @@ class Get_Tag(Cmdlet):
description="Scrape metadata from URL/provider, or use 'ytdlp' to scrape from the item's URL and overwrite tags",
required=False,
choices=_SCRAPE_CHOICES,
)
),
],
detail=[
"- Retrieves tags for a file from:",
" Hydrus: Using file hash if available",
" Local: From sidecar files or local library database",
"- Options:",
" -query: Override hash to look up in Hydrus (use: -query \"hash:<sha256>\")",
' -query: Override hash to look up in Hydrus (use: -query "hash:<sha256>")',
" -store: Store result to key for downstream pipeline",
" -emit: Quiet mode (no interactive selection)",
" -scrape: Scrape metadata from URL or metadata provider",
@@ -1585,5 +1716,3 @@ class Get_Tag(Cmdlet):
# Create and register the cmdlet
CMDLET = Get_Tag()
+171 -13
View File
@@ -1,8 +1,11 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Dict, List, Sequence
from typing import Any, Dict, List, Sequence, Optional, Set, Tuple
import sys
import re
from fnmatch import fnmatch
from urllib.parse import urlparse
import pipeline as ctx
from . import _shared as sh
@@ -26,28 +29,176 @@ class UrlItem:
class Get_Url(Cmdlet):
"""Get url associated with files via hash+store."""
"""Get url associated with files via hash+store, or search urls by pattern."""
def __init__(self) -> None:
super().__init__(
name="get-url",
summary="List url associated with a file",
usage="@1 | get-url",
arg=[
SharedArgs.QUERY,
SharedArgs.STORE,
],
summary="List url associated with a file, or search urls by pattern",
usage='@1 | get-url OR get-url -url "https://www.youtube.com/watch?v=xx"',
arg=[SharedArgs.QUERY, SharedArgs.STORE, SharedArgs.URL],
detail=[
"- Lists all url associated with file identified by hash+store",
"- Get url for file: @1 | get-url (requires hash+store from result)",
'- Search url across stores: get-url -url "www.google.com" (strips protocol & www prefix)',
'- Wildcard matching: get-url -url "youtube.com*" (matches all youtube.com urls)',
"- Pattern matching: domain matching ignores protocol (https://, http://, ftp://)",
],
exec=self.run,
)
self.register()
@staticmethod
def _normalize_url_for_search(url: str) -> str:
"""Strip protocol and www prefix from URL for searching.
Examples:
https://www.youtube.com/watch?v=xx -> youtube.com/watch?v=xx
http://www.google.com -> google.com
ftp://files.example.com -> files.example.com
"""
url = str(url or "").strip()
# Remove protocol (http://, https://, ftp://, etc.)
url = re.sub(r"^[a-z][a-z0-9+.-]*://", "", url, flags=re.IGNORECASE)
# Remove www. prefix (case-insensitive)
url = re.sub(r"^www\.", "", url, flags=re.IGNORECASE)
return url.lower()
@staticmethod
def _match_url_pattern(url: str, pattern: str) -> bool:
"""Match URL against pattern with wildcard support.
Strips protocol/www from both URL and pattern before matching.
Supports * and ? wildcards.
"""
normalized_url = Get_Url._normalize_url_for_search(url)
normalized_pattern = Get_Url._normalize_url_for_search(pattern)
# Use fnmatch for wildcard matching (* and ?)
return fnmatch(normalized_url, normalized_pattern)
def _search_urls_across_stores(
self, pattern: str, config: Dict[str, Any]
) -> Tuple[List[UrlItem], List[str]]:
"""Search for URLs matching pattern across all stores.
Returns:
Tuple of (matching_items, found_stores)
"""
items: List[UrlItem] = []
found_stores: Set[str] = set()
try:
storage = Store(config)
store_names = storage.list_backends() if hasattr(storage, "list_backends") else []
if not store_names:
log("Error: No stores configured", file=sys.stderr)
return items, list(found_stores)
for store_name in store_names:
try:
backend = storage[store_name]
# Try to search files in this backend
# For now, we'll iterate through known files (this is a limitation)
# Each backend should ideally support get_all_files() or similar
# For now, we use search with a broad query to find candidates
try:
# Try to get files via search (backend-specific)
search_results = backend.search("*", limit=1000)
if search_results:
for result in search_results:
file_hash = result.get("hash") or result.get("file_hash")
if not file_hash:
continue
try:
urls = backend.get_url(file_hash)
if urls:
for url in urls:
if self._match_url_pattern(str(url), pattern):
items.append(
UrlItem(
url=str(url),
hash=file_hash,
store=store_name,
)
)
found_stores.add(store_name)
except Exception:
pass
except Exception:
# Backend might not support search; skip
pass
except KeyError:
continue
except Exception as exc:
debug(f"Error searching store '{store_name}': {exc}", file=sys.stderr)
continue
return items, list(found_stores)
except Exception as exc:
log(f"Error searching stores: {exc}", file=sys.stderr)
return items, []
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Get url for file via hash+store backend."""
"""Get url for file via hash+store, or search urls by pattern."""
parsed = parse_cmdlet_args(args, self)
# Check if user provided a URL pattern to search for
search_pattern = parsed.get("url")
if search_pattern:
# URL search mode: find all files with matching URLs across stores
items, stores_searched = self._search_urls_across_stores(search_pattern, config)
if not items:
log(f"No urls matching pattern: {search_pattern}", file=sys.stderr)
return 1
# Create result table
from result_table import ResultTable
table = (
ResultTable("URL Search Results", max_columns=3)
.set_preserve_order(True)
.set_table("urls")
.set_value_case("preserve")
)
table.set_source_command("get-url", ["-url", search_pattern])
# Group by store for display
by_store: Dict[str, List[UrlItem]] = {}
for item in items:
if item.store not in by_store:
by_store[item.store] = []
by_store[item.store].append(item)
# Add rows grouped by store
for store_name in sorted(by_store.keys()):
store_items = by_store[store_name]
for idx, item in enumerate(store_items):
row = table.add_row()
if idx == 0:
row.add_column("Store", store_name)
else:
row.add_column("Store", "")
row.add_column("Url", item.url)
# Normalize for display
normalized = self._normalize_url_for_search(item.url)
row.add_column("Hash", item.hash[:16]) # Show first 16 chars
ctx.emit(item)
ctx.set_last_result_table(table if items else None, items, subject=result)
log(f"Found {len(items)} matching url(s) in {len(stores_searched)} store(s)")
return 0
# Original mode: Get URLs for a specific file by hash+store
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("Error: -query must be of the form hash:<sha256>")
@@ -58,7 +209,7 @@ class Get_Url(Cmdlet):
store_name = parsed.get("store") or get_field(result, "store")
if not file_hash:
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
log('Error: No file hash provided (pipe an item or use -query "hash:<sha256>")')
return 1
if not store_name:
@@ -120,6 +271,13 @@ class Get_Url(Cmdlet):
return 1
# Import debug function from logger if available
try:
from SYS.logger import debug
except ImportError:
def debug(*args, **kwargs):
pass # Fallback no-op
CMDLET = Get_Url()
+244 -144
View File
@@ -1,4 +1,5 @@
"""Merge multiple files into a single output file."""
from __future__ import annotations
from typing import Any, Dict, Optional, Sequence, List
@@ -29,6 +30,7 @@ import pipeline as ctx
try:
from pypdf import PdfWriter, PdfReader
HAS_PYPDF = True
except ImportError:
HAS_PYPDF = False
@@ -40,6 +42,7 @@ try:
read_tags_from_file,
merge_multiple_tag_lists,
)
HAS_METADATA_API = True
except ImportError:
HAS_METADATA_API = False
@@ -59,11 +62,11 @@ except ImportError:
def dedup_tags_by_namespace(tags: List[str]) -> List[str]:
return tags
def merge_multiple_tag_lists(sources: List[List[str]], strategy: str = 'first') -> List[str]:
def merge_multiple_tag_lists(sources: List[List[str]], strategy: str = "first") -> List[str]:
out: List[str] = []
seen: set[str] = set()
for src in sources:
for t in (src or []):
for t in src or []:
s = str(t)
if s and s not in seen:
out.append(s)
@@ -163,7 +166,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
except Exception:
cookiefile = None
fmts = list_formats(sample_url, no_playlist=False, playlist_items=None, cookiefile=cookiefile)
fmts = list_formats(
sample_url, no_playlist=False, playlist_items=None, cookiefile=cookiefile
)
if isinstance(fmts, list) and fmts:
has_video = False
for f in fmts:
@@ -228,16 +233,18 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Track tags carried in the piped items (e.g. add-tag stage) so they survive merge.
try:
raw_tags = get_field(item, 'tag', [])
raw_tags = get_field(item, "tag", [])
if isinstance(raw_tags, str) and raw_tags.strip():
source_item_tag_lists.append([raw_tags.strip()])
elif isinstance(raw_tags, list):
source_item_tag_lists.append([str(t) for t in raw_tags if t is not None and str(t).strip()])
source_item_tag_lists.append(
[str(t) for t in raw_tags if t is not None and str(t).strip()]
)
except Exception:
pass
# Track tags from the .tag sidecar for this source (if present)
tags_file = target_path.with_suffix(target_path.suffix + '.tag')
tags_file = target_path.with_suffix(target_path.suffix + ".tag")
if tags_file.exists() and HAS_METADATA_API:
try:
source_tags.extend(read_tags_from_file(tags_file) or [])
@@ -250,13 +257,13 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
source_hashes.append(str(hash_value))
# Extract known url if available
url = get_field(item, 'url', [])
url = get_field(item, "url", [])
if isinstance(url, str):
source_url.append(url)
elif isinstance(url, list):
source_url.extend(url)
else:
title = get_field(item, 'title', 'unknown') or get_field(item, 'id', 'unknown')
title = get_field(item, "title", "unknown") or get_field(item, "id", "unknown")
log(f"Warning: Could not locate file for item: {title}", file=sys.stderr)
if len(source_files) < 2:
@@ -267,41 +274,53 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
file_types = set()
for f in source_files:
suffix = f.suffix.lower()
if suffix in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.mka'}:
file_types.add('audio')
elif suffix in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
file_types.add('video')
elif suffix in {'.pdf'}:
file_types.add('pdf')
elif suffix in {'.txt', '.srt', '.vtt', '.md', '.log'}:
file_types.add('text')
if suffix in {".mp3", ".flac", ".wav", ".m4a", ".aac", ".ogg", ".opus", ".mka"}:
file_types.add("audio")
elif suffix in {
".mp4",
".mkv",
".webm",
".mov",
".avi",
".flv",
".mpg",
".mpeg",
".ts",
".m4v",
".wmv",
}:
file_types.add("video")
elif suffix in {".pdf"}:
file_types.add("pdf")
elif suffix in {".txt", ".srt", ".vtt", ".md", ".log"}:
file_types.add("text")
else:
file_types.add('other')
file_types.add("other")
if len(file_types) > 1 and 'other' not in file_types:
if len(file_types) > 1 and "other" not in file_types:
log(f"Mixed file types detected: {', '.join(sorted(file_types))}", file=sys.stderr)
log(f"Can only merge files of the same type", file=sys.stderr)
return 1
file_kind = list(file_types)[0] if file_types else 'other'
file_kind = list(file_types)[0] if file_types else "other"
# Determine output format
output_format = format_spec or 'auto'
if output_format == 'auto':
if file_kind == 'audio':
output_format = 'mka' # Default audio codec - mka supports chapters and stream copy
elif file_kind == 'video':
output_format = 'mp4' # Default video codec
elif file_kind == 'pdf':
output_format = 'pdf'
output_format = format_spec or "auto"
if output_format == "auto":
if file_kind == "audio":
output_format = "mka" # Default audio codec - mka supports chapters and stream copy
elif file_kind == "video":
output_format = "mp4" # Default video codec
elif file_kind == "pdf":
output_format = "pdf"
else:
output_format = 'txt'
output_format = "txt"
# Determine output path
if output_override:
if output_override.is_dir():
base_title = get_field(files_to_merge[0], 'title', 'merged')
base_name = _sanitize_name(str(base_title or 'merged'))
base_title = get_field(files_to_merge[0], "title", "merged")
base_name = _sanitize_name(str(base_title or "merged"))
output_path = output_override / f"{base_name} (merged).{_ext_for_format(output_format)}"
else:
output_path = output_override
@@ -311,19 +330,21 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
base_dir = resolve_output_dir(config)
except Exception:
base_dir = first_file.parent
output_path = Path(base_dir) / f"{first_file.stem} (merged).{_ext_for_format(output_format)}"
output_path = (
Path(base_dir) / f"{first_file.stem} (merged).{_ext_for_format(output_format)}"
)
# Ensure output directory exists
output_path.parent.mkdir(parents=True, exist_ok=True)
# Perform merge based on file type
if file_kind == 'audio':
if file_kind == "audio":
success = _merge_audio(source_files, output_path, output_format)
elif file_kind == 'video':
elif file_kind == "video":
success = _merge_video(source_files, output_path, output_format)
elif file_kind == 'pdf':
elif file_kind == "pdf":
success = _merge_pdf(source_files, output_path)
elif file_kind == 'text':
elif file_kind == "text":
success = _merge_text(source_files, output_path)
else:
log(f"Unsupported file type: {file_kind}", file=sys.stderr)
@@ -341,8 +362,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
s = str(t)
except Exception:
continue
if s.lower().startswith('title:'):
val = s.split(':', 1)[1].strip()
if s.lower().startswith("title:"):
val = s.split(":", 1)[1].strip()
return val or None
return None
@@ -352,7 +373,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
shared_title: Optional[str] = None
try:
if source_item_tag_lists:
per_item_titles: List[Optional[str]] = [_title_value_from_tags(tl) for tl in source_item_tag_lists]
per_item_titles: List[Optional[str]] = [
_title_value_from_tags(tl) for tl in source_item_tag_lists
]
non_empty = [t for t in per_item_titles if t]
if non_empty:
candidate = non_empty[0]
@@ -369,15 +392,18 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# - in-memory PipeObject tags (from add-tag etc)
# - .tag sidecars (if present)
# Keep all unique plain tags, and keep the first value for namespaced tags.
merged_tags = merge_multiple_tag_lists(source_item_tag_lists + ([source_tags] if source_tags else []), strategy='combine')
merged_tags = merge_multiple_tag_lists(
source_item_tag_lists + ([source_tags] if source_tags else []), strategy="combine"
)
# Ensure we always have a title tag (and make sure it's the chosen title)
merged_tags = [t for t in merged_tags if not str(t).lower().startswith('title:')]
merged_tags = [t for t in merged_tags if not str(t).lower().startswith("title:")]
merged_tags.insert(0, f"title:{merged_title}")
# Emit a PipeObject-compatible dict so the merged file can be piped to next command
try:
from SYS.utils import sha256_file
merged_hash = sha256_file(output_path)
merged_item = create_pipe_object_result(
source="local",
@@ -405,7 +431,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
for f in source_files:
try:
# Delete sidecar tags for the source (if any)
tag_file = f.with_suffix(f.suffix + '.tag')
tag_file = f.with_suffix(f.suffix + ".tag")
if tag_file.exists():
try:
tag_file.unlink()
@@ -436,28 +462,29 @@ def _sanitize_name(text: str) -> str:
def _ext_for_format(fmt: str) -> str:
"""Get file extension for format."""
format_map = {
'mp3': 'mp3',
'm4a': 'm4a',
'm4b': 'm4b',
'aac': 'aac',
'opus': 'opus',
'mka': 'mka', # Matroska Audio - EXCELLENT chapter support (recommended)
'mkv': 'mkv',
'mp4': 'mp4',
'webm': 'webm',
'pdf': 'pdf',
'txt': 'txt',
'auto': 'mka', # Default - MKA for chapters
"mp3": "mp3",
"m4a": "m4a",
"m4b": "m4b",
"aac": "aac",
"opus": "opus",
"mka": "mka", # Matroska Audio - EXCELLENT chapter support (recommended)
"mkv": "mkv",
"mp4": "mp4",
"webm": "webm",
"pdf": "pdf",
"txt": "txt",
"auto": "mka", # Default - MKA for chapters
}
return format_map.get(fmt.lower(), 'mka')
return format_map.get(fmt.lower(), "mka")
def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
"""Merge audio files with chapters based on file boundaries."""
import logging
logger = logging.getLogger(__name__)
ffmpeg_path = _shutil.which('ffmpeg')
ffmpeg_path = _shutil.which("ffmpeg")
if not ffmpeg_path:
log("ffmpeg not found in PATH", file=sys.stderr)
return False
@@ -474,20 +501,31 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
# Get duration using ffprobe
try:
ffprobe_cmd = [
'ffprobe', '-v', 'error', '-show_entries',
'format=duration', '-print_format',
'default=noprint_wrappers=1:nokey=1', str(file_path)
"ffprobe",
"-v",
"error",
"-show_entries",
"format=duration",
"-print_format",
"default=noprint_wrappers=1:nokey=1",
str(file_path),
]
probe_result = _subprocess.run(ffprobe_cmd, capture_output=True, text=True, timeout=10)
probe_result = _subprocess.run(
ffprobe_cmd, capture_output=True, text=True, timeout=10
)
if probe_result.returncode == 0 and probe_result.stdout.strip():
try:
duration_sec = float(probe_result.stdout.strip())
except ValueError:
logger.warning(f"[merge-file] Could not parse duration from ffprobe output: {probe_result.stdout}")
logger.warning(
f"[merge-file] Could not parse duration from ffprobe output: {probe_result.stdout}"
)
duration_sec = 0
else:
logger.warning(f"[merge-file] ffprobe failed for {file_path.name}: {probe_result.stderr}")
logger.warning(
f"[merge-file] ffprobe failed for {file_path.name}: {probe_result.stderr}"
)
duration_sec = 0
except Exception as e:
logger.warning(f"[merge-file] Could not get duration for {file_path.name}: {e}")
@@ -498,15 +536,15 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
if HAS_METADATA_API:
try:
# Try to read tags from .tag sidecar file
tags_file = file_path.with_suffix(file_path.suffix + '.tag')
tags_file = file_path.with_suffix(file_path.suffix + ".tag")
if tags_file.exists():
tags = read_tags_from_file(tags_file)
if tags:
# Look for title: tag
for tag in tags:
if isinstance(tag, str) and tag.lower().startswith('title:'):
if isinstance(tag, str) and tag.lower().startswith("title:"):
# Extract the title value after the colon
title = tag.split(':', 1)[1].strip()
title = tag.split(":", 1)[1].strip()
break
except Exception as e:
logger.debug(f"[merge-file] Could not read metadata for {file_path.name}: {e}")
@@ -518,14 +556,18 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
seconds = int((current_time_ms % 60000) // 1000)
millis = int(current_time_ms % 1000)
chapters.append({
'time_ms': current_time_ms,
'time_str': f"{hours:02d}:{minutes:02d}:{seconds:02d}.{millis:03d}",
'title': title,
'duration_sec': duration_sec
})
chapters.append(
{
"time_ms": current_time_ms,
"time_str": f"{hours:02d}:{minutes:02d}:{seconds:02d}.{millis:03d}",
"title": title,
"duration_sec": duration_sec,
}
)
logger.info(f"[merge-file] Chapter: {title} @ {chapters[-1]['time_str']} (duration: {duration_sec:.2f}s)")
logger.info(
f"[merge-file] Chapter: {title} @ {chapters[-1]['time_str']} (duration: {duration_sec:.2f}s)"
)
current_time_ms += int(duration_sec * 1000)
# If these came from a playlist/album, titles often look like:
@@ -537,13 +579,13 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
stripped_titles: List[str] = []
all_match = True
for ch in chapters:
raw_title = str(ch.get('title') or '').strip()
raw_title = str(ch.get("title") or "").strip()
m = split_re.match(raw_title)
if not m:
all_match = False
break
prefix = m.group('prefix').strip()
chapter_title = m.group('chapter').strip()
prefix = m.group("prefix").strip()
chapter_title = m.group("chapter").strip()
if not prefix or not chapter_title:
all_match = False
break
@@ -552,8 +594,10 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
if all_match and prefixes and len(set(prefixes)) == 1:
for idx, ch in enumerate(chapters):
ch['title'] = stripped_titles[idx]
logger.info(f"[merge-file] Stripped common title prefix for chapters: {prefixes[0]}")
ch["title"] = stripped_titles[idx]
logger.info(
f"[merge-file] Stripped common title prefix for chapters: {prefixes[0]}"
)
# Step 2: Create concat demuxer file
concat_file = output.parent / f".concat_{output.stem}.txt"
@@ -563,57 +607,57 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
safe_path = str(f).replace("'", "'\\''")
concat_lines.append(f"file '{safe_path}'")
concat_file.write_text('\n'.join(concat_lines), encoding='utf-8')
concat_file.write_text("\n".join(concat_lines), encoding="utf-8")
# Step 3: Create FFmpeg metadata file with chapters
metadata_file = output.parent / f".metadata_{output.stem}.txt"
metadata_lines = [';FFMETADATA1']
metadata_lines = [";FFMETADATA1"]
for i, chapter in enumerate(chapters):
# FFMetadata format for chapters (note: [CHAPTER] not [CHAPTER01])
metadata_lines.append('[CHAPTER]')
metadata_lines.append('TIMEBASE=1/1000')
metadata_lines.append("[CHAPTER]")
metadata_lines.append("TIMEBASE=1/1000")
metadata_lines.append(f'START={chapter["time_ms"]}')
# Calculate end time (start of next chapter or end of file)
if i < len(chapters) - 1:
metadata_lines.append(f'END={chapters[i+1]["time_ms"]}')
else:
metadata_lines.append(f'END={current_time_ms}')
metadata_lines.append(f"END={current_time_ms}")
metadata_lines.append(f'title={chapter["title"]}')
metadata_file.write_text('\n'.join(metadata_lines), encoding='utf-8')
metadata_file.write_text("\n".join(metadata_lines), encoding="utf-8")
log(f"Created chapters metadata file with {len(chapters)} chapters", file=sys.stderr)
logger.info(f"[merge-file] Created {len(chapters)} chapters")
# Step 4: Build FFmpeg command to merge and embed chapters
# Strategy: First merge audio, then add metadata in separate pass
cmd = [ffmpeg_path, '-y', '-f', 'concat', '-safe', '0', '-i', str(concat_file)]
cmd = [ffmpeg_path, "-y", "-f", "concat", "-safe", "0", "-i", str(concat_file)]
# Add threading options for speed
cmd.extend(['-threads', '0']) # Use all available threads
cmd.extend(["-threads", "0"]) # Use all available threads
# Audio codec selection for first input
if output_format == 'mp3':
cmd.extend(['-c:a', 'libmp3lame', '-q:a', '2'])
elif output_format in {'m4a', 'm4b'}:
if output_format == "mp3":
cmd.extend(["-c:a", "libmp3lame", "-q:a", "2"])
elif output_format in {"m4a", "m4b"}:
# Use copy if possible (much faster), otherwise re-encode
# Check if inputs are already AAC/M4A to avoid re-encoding
# For now, default to copy if format matches, otherwise re-encode
# But since we are merging potentially different codecs, re-encoding is safer
# To speed up re-encoding, we can use a faster preset or hardware accel if available
cmd.extend(['-c:a', 'aac', '-b:a', '256k']) # M4A with better quality
elif output_format == 'aac':
cmd.extend(['-c:a', 'aac', '-b:a', '192k'])
elif output_format == 'opus':
cmd.extend(['-c:a', 'libopus', '-b:a', '128k'])
elif output_format == 'mka':
cmd.extend(["-c:a", "aac", "-b:a", "256k"]) # M4A with better quality
elif output_format == "aac":
cmd.extend(["-c:a", "aac", "-b:a", "192k"])
elif output_format == "opus":
cmd.extend(["-c:a", "libopus", "-b:a", "128k"])
elif output_format == "mka":
# FLAC is fast to encode but large. Copy is fastest if inputs are compatible.
# If we want speed, copy is best. If we want compatibility, re-encode.
# Let's try copy first if inputs are same format, but that's hard to detect here.
# Defaulting to copy for MKA as it's a container that supports many codecs
cmd.extend(['-c:a', 'copy'])
cmd.extend(["-c:a", "copy"])
else:
cmd.extend(['-c:a', 'copy']) # Copy without re-encoding
cmd.extend(["-c:a", "copy"]) # Copy without re-encoding
# Add the output file
cmd.append(str(output))
@@ -631,8 +675,8 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
stdout=_subprocess.PIPE,
stderr=_subprocess.PIPE,
text=True,
encoding='utf-8',
errors='replace'
encoding="utf-8",
errors="replace",
)
# Monitor progress
@@ -659,7 +703,7 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
output.name,
int(current_sec * 1000), # Use ms as "bytes" for progress bar
int(total_duration_sec * 1000),
speed=0
speed=0,
)
else:
break
@@ -669,7 +713,9 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
if process.returncode != 0:
log(f"FFmpeg error: {stderr}", file=sys.stderr)
raise _subprocess.CalledProcessError(process.returncode, cmd, output=stdout, stderr=stderr)
raise _subprocess.CalledProcessError(
process.returncode, cmd, output=stdout, stderr=stderr
)
print_final_progress(output.name, int(total_duration_sec * 1000), 0)
@@ -680,7 +726,7 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
log(f"Merge successful, adding chapters metadata...", file=sys.stderr)
# Step 5: Embed chapters into container (MKA, MP4/M4A, or note limitation)
if output_format == 'mka' or output.suffix.lower() == '.mka':
if output_format == "mka" or output.suffix.lower() == ".mka":
# MKA/MKV format has native chapter support via FFMetadata
# Re-mux the file with chapters embedded (copy streams, no re-encode)
log(f"Embedding chapters into Matroska container...", file=sys.stderr)
@@ -689,29 +735,42 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
temp_output = output.parent / f".temp_{output.stem}.mka"
# Use mkvmerge if available (best for MKA chapters), otherwise fall back to ffmpeg
mkvmerge_path = _shutil.which('mkvmerge')
mkvmerge_path = _shutil.which("mkvmerge")
if mkvmerge_path:
# mkvmerge is the best tool for embedding chapters in Matroska files
log(f"Using mkvmerge for optimal chapter embedding...", file=sys.stderr)
cmd2 = [
mkvmerge_path, '-o', str(temp_output),
'--chapters', str(metadata_file),
str(output)
mkvmerge_path,
"-o",
str(temp_output),
"--chapters",
str(metadata_file),
str(output),
]
else:
# Fallback to ffmpeg with proper chapter embedding for Matroska
log(f"Using ffmpeg for chapter embedding (install mkvtoolnix for better quality)...", file=sys.stderr)
log(
f"Using ffmpeg for chapter embedding (install mkvtoolnix for better quality)...",
file=sys.stderr,
)
# For Matroska files, the metadata must be provided via -f ffmetadata input
cmd2 = [
ffmpeg_path, '-y',
'-i', str(output), # Input: merged audio
'-i', str(metadata_file), # Input: FFMetadata file
'-c:a', 'copy', # Copy audio without re-encoding
'-threads', '0', # Use all threads
'-map', '0', # Map all from first input
'-map_chapters', '1', # Map CHAPTERS from second input (FFMetadata)
str(temp_output) # Output
ffmpeg_path,
"-y",
"-i",
str(output), # Input: merged audio
"-i",
str(metadata_file), # Input: FFMetadata file
"-c:a",
"copy", # Copy audio without re-encoding
"-threads",
"0", # Use all threads
"-map",
"0", # Map all from first input
"-map_chapters",
"1", # Map CHAPTERS from second input (FFMetadata)
str(temp_output), # Output
]
logger.info(f"[merge-file] Running chapter embedding: {' '.join(cmd2)}")
@@ -724,13 +783,14 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
text=True,
stdin=_subprocess.DEVNULL,
timeout=600,
check=False
check=False,
)
# Replace original with temp if successful
if temp_output.exists() and temp_output.stat().st_size > 0:
try:
import shutil
if output.exists():
output.unlink()
shutil.move(str(temp_output), str(output))
@@ -738,7 +798,10 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
logger.info(f"[merge-file] Chapters embedded successfully")
except Exception as e:
logger.warning(f"[merge-file] Could not replace file: {e}")
log(f"Warning: Could not embed chapters, using merge without chapters", file=sys.stderr)
log(
f"Warning: Could not embed chapters, using merge without chapters",
file=sys.stderr,
)
try:
temp_output.unlink()
except Exception:
@@ -747,8 +810,11 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
logger.warning(f"[merge-file] Chapter embedding did not create output")
except Exception as e:
logger.exception(f"[merge-file] Chapter embedding failed: {e}")
log(f"Warning: Chapter embedding failed, using merge without chapters", file=sys.stderr)
elif output_format in {'m4a', 'm4b'} or output.suffix.lower() in ['.m4a', '.m4b', '.mp4']:
log(
f"Warning: Chapter embedding failed, using merge without chapters",
file=sys.stderr,
)
elif output_format in {"m4a", "m4b"} or output.suffix.lower() in [".m4a", ".m4b", ".mp4"]:
# MP4/M4A format has native chapter support via iTunes metadata atoms
log(f"Embedding chapters into MP4 container...", file=sys.stderr)
logger.info(f"[merge-file] Adding chapters to M4A/MP4 file via iTunes metadata")
@@ -758,15 +824,23 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
# ffmpeg embeds chapters in MP4 using -map_metadata and -map_chapters
log(f"Using ffmpeg for MP4 chapter embedding...", file=sys.stderr)
cmd2 = [
ffmpeg_path, '-y',
'-i', str(output), # Input: merged audio
'-i', str(metadata_file), # Input: FFMetadata file
'-c:a', 'copy', # Copy audio without re-encoding
'-threads', '0', # Use all threads
'-map', '0', # Map all from first input
'-map_metadata', '1', # Map metadata from second input (FFMetadata)
'-map_chapters', '1', # Map CHAPTERS from second input (FFMetadata)
str(temp_output) # Output
ffmpeg_path,
"-y",
"-i",
str(output), # Input: merged audio
"-i",
str(metadata_file), # Input: FFMetadata file
"-c:a",
"copy", # Copy audio without re-encoding
"-threads",
"0", # Use all threads
"-map",
"0", # Map all from first input
"-map_metadata",
"1", # Map metadata from second input (FFMetadata)
"-map_chapters",
"1", # Map CHAPTERS from second input (FFMetadata)
str(temp_output), # Output
]
logger.info(f"[merge-file] Running MP4 chapter embedding: {' '.join(cmd2)}")
@@ -779,13 +853,14 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
text=True,
stdin=_subprocess.DEVNULL,
timeout=600,
check=False
check=False,
)
# Replace original with temp if successful
if temp_output.exists() and temp_output.stat().st_size > 0:
try:
import shutil
if output.exists():
output.unlink()
shutil.move(str(temp_output), str(output))
@@ -793,7 +868,10 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
logger.info(f"[merge-file] MP4 chapters embedded successfully")
except Exception as e:
logger.warning(f"[merge-file] Could not replace file: {e}")
log(f"Warning: Could not embed chapters, using merge without chapters", file=sys.stderr)
log(
f"Warning: Could not embed chapters, using merge without chapters",
file=sys.stderr,
)
try:
temp_output.unlink()
except Exception:
@@ -802,7 +880,10 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
logger.warning(f"[merge-file] MP4 chapter embedding did not create output")
except Exception as e:
logger.exception(f"[merge-file] MP4 chapter embedding failed: {e}")
log(f"Warning: MP4 chapter embedding failed, using merge without chapters", file=sys.stderr)
log(
f"Warning: MP4 chapter embedding failed, using merge without chapters",
file=sys.stderr,
)
else:
# For other formats, chapters would require external tools
logger.info(f"[merge-file] Format {output_format} does not have native chapter support")
@@ -828,7 +909,7 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
def _merge_video(files: List[Path], output: Path, output_format: str) -> bool:
"""Merge video files."""
ffmpeg_path = _shutil.which('ffmpeg')
ffmpeg_path = _shutil.which("ffmpeg")
if not ffmpeg_path:
log("ffmpeg not found in PATH", file=sys.stderr)
return False
@@ -841,18 +922,31 @@ def _merge_video(files: List[Path], output: Path, output_format: str) -> bool:
safe_path = str(f).replace("'", "'\\''")
concat_lines.append(f"file '{safe_path}'")
concat_file.write_text('\n'.join(concat_lines), encoding='utf-8')
concat_file.write_text("\n".join(concat_lines), encoding="utf-8")
# Build FFmpeg command for video merge
cmd = [ffmpeg_path, '-y', '-f', 'concat', '-safe', '0', '-i', str(concat_file)]
cmd = [ffmpeg_path, "-y", "-f", "concat", "-safe", "0", "-i", str(concat_file)]
# Video codec selection
if output_format == 'mp4':
cmd.extend(['-c:v', 'libx265', '-preset', 'fast', '-tag:v', 'hvc1', '-c:a', 'aac', '-b:a', '192k'])
elif output_format == 'mkv':
cmd.extend(['-c:v', 'libx265', '-preset', 'fast', '-c:a', 'aac', '-b:a', '192k'])
if output_format == "mp4":
cmd.extend(
[
"-c:v",
"libx265",
"-preset",
"fast",
"-tag:v",
"hvc1",
"-c:a",
"aac",
"-b:a",
"192k",
]
)
elif output_format == "mkv":
cmd.extend(["-c:v", "libx265", "-preset", "fast", "-c:a", "aac", "-b:a", "192k"])
else:
cmd.extend(['-c', 'copy']) # Copy without re-encoding
cmd.extend(["-c", "copy"]) # Copy without re-encoding
cmd.append(str(output))
@@ -866,7 +960,7 @@ def _merge_video(files: List[Path], output: Path, output_format: str) -> bool:
pass
if result.returncode != 0:
stderr = (result.stderr or '').strip()
stderr = (result.stderr or "").strip()
log(f"FFmpeg error: {stderr}", file=sys.stderr)
return False
@@ -880,12 +974,12 @@ def _merge_video(files: List[Path], output: Path, output_format: str) -> bool:
def _merge_text(files: List[Path], output: Path) -> bool:
"""Merge text files."""
try:
with open(output, 'w', encoding='utf-8') as outf:
with open(output, "w", encoding="utf-8") as outf:
for i, f in enumerate(files):
if i > 0:
outf.write('\n---\n') # Separator between files
outf.write("\n---\n") # Separator between files
try:
content = f.read_text(encoding='utf-8', errors='replace')
content = f.read_text(encoding="utf-8", errors="replace")
outf.write(content)
except Exception as e:
log(f"Warning reading {f.name}: {e}", file=sys.stderr)
@@ -916,7 +1010,7 @@ def _merge_pdf(files: List[Path], output: Path) -> bool:
log(f"Error reading PDF {f.name}: {e}", file=sys.stderr)
return False
with open(output, 'wb') as outf:
with open(output, "wb") as outf:
writer.write(outf)
return True
@@ -925,14 +1019,20 @@ def _merge_pdf(files: List[Path], output: Path) -> bool:
log(f"PDF merge error: {e}", file=sys.stderr)
return False
CMDLET = Cmdlet(
name="merge-file",
summary="Merge multiple files into a single output file. Supports audio, video, PDF, and text merging with optional cleanup.",
usage="merge-file [-delete] [-path <path>] [-format <auto|mka|m4a|m4b|mp3|aac|opus|mp4|mkv|pdf|txt>]",
arg=[
CmdletArg("-delete", type="flag", description="Delete source files after successful merge."),
CmdletArg(
"-delete", type="flag", description="Delete source files after successful merge."
),
SharedArgs.PATH,
CmdletArg("-format", description="Output format (auto/mka/m4a/m4b/mp3/aac/opus/mp4/mkv/pdf/txt). Default: auto-detect from first file."),
CmdletArg(
"-format",
description="Output format (auto/mka/m4a/m4b/mp3/aac/opus/mp4/mkv/pdf/txt). Default: auto-detect from first file.",
),
],
detail=[
"- Pipe multiple files: search-file query | [1,2,3] | merge-file",
+81 -38
View File
@@ -37,7 +37,6 @@ import pipeline as pipeline_context
# ============================================================================
# ============================================================================
# Playwright & Screenshot Dependencies
# ============================================================================
@@ -104,7 +103,6 @@ SITE_SELECTORS: Dict[str, List[str]] = {
}
class ScreenshotError(RuntimeError):
"""Raised when screenshot capture or upload fails."""
@@ -146,6 +144,7 @@ class ScreenshotResult:
# Helper Functions
# ============================================================================
def _slugify_url(url: str) -> str:
"""Convert URL to filesystem-safe slug."""
parsed = urlsplit(url)
@@ -172,7 +171,11 @@ def _tags_from_url(url: str) -> List[str]:
parsed = None
try:
parsed = urlsplit(u)
host = str(getattr(parsed, "hostname", None) or getattr(parsed, "netloc", "") or "").strip().lower()
host = (
str(getattr(parsed, "hostname", None) or getattr(parsed, "netloc", "") or "")
.strip()
.lower()
)
except Exception:
parsed = None
host = ""
@@ -300,7 +303,12 @@ def _convert_to_webp(
except Exception:
w, h = 0, 0
if downscale_if_oversize and isinstance(max_dim, int) and max_dim > 0 and (w > max_dim or h > max_dim):
if (
downscale_if_oversize
and isinstance(max_dim, int)
and max_dim > 0
and (w > max_dim or h > max_dim)
):
scale = 1.0
try:
scale = min(float(max_dim) / float(w), float(max_dim) / float(h))
@@ -320,7 +328,9 @@ def _convert_to_webp(
im = im.resize((new_w, new_h), resample=resample)
did_downscale = True
except Exception as exc:
debug(f"[_convert_to_webp] Downscale failed; attempting direct WEBP save anyway: {exc}")
debug(
f"[_convert_to_webp] Downscale failed; attempting direct WEBP save anyway: {exc}"
)
im.save(tmp_path, **save_kwargs)
@@ -332,6 +342,7 @@ def _convert_to_webp(
except Exception:
pass
def _matched_site_selectors(url: str) -> List[str]:
"""Return SITE_SELECTORS for a matched domain; empty if no match.
@@ -355,7 +366,9 @@ def _selectors_for_url(url: str) -> List[str]:
return _matched_site_selectors(url)
def _platform_preprocess(url: str, page: Any, warnings: List[str], timeout_ms: int = 10_000) -> None:
def _platform_preprocess(
url: str, page: Any, warnings: List[str], timeout_ms: int = 10_000
) -> None:
"""Best-effort page tweaks for popular platforms before capture."""
try:
u = str(url or "").lower()
@@ -373,14 +386,16 @@ def _platform_preprocess(url: str, page: Any, warnings: List[str], timeout_ms: i
return clicks
# Dismiss common cookie / consent prompts.
_try_click_buttons([
_try_click_buttons(
[
"Accept all",
"Accept",
"I agree",
"Agree",
"Allow all",
"OK",
])
]
)
# Some sites need small nudges (best-effort).
if "reddit.com" in u:
@@ -490,7 +505,9 @@ def _prepare_output_path(options: ScreenshotOptions) -> Path:
return unique_path(path)
def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str], progress: PipelineProgress) -> None:
def _capture(
options: ScreenshotOptions, destination: Path, warnings: List[str], progress: PipelineProgress
) -> None:
"""Capture screenshot using Playwright."""
debug(f"[_capture] Starting capture for {options.url} -> {destination}")
try:
@@ -499,16 +516,24 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str],
# Ensure Chromium engine is used for the screen-shot cmdlet (force for consistency)
try:
current_browser = getattr(tool.defaults, "browser", "").lower() if getattr(tool, "defaults", None) is not None else ""
current_browser = (
getattr(tool.defaults, "browser", "").lower()
if getattr(tool, "defaults", None) is not None
else ""
)
if current_browser != "chromium":
debug(f"[_capture] Overriding Playwright browser '{current_browser}' -> 'chromium' for screen-shot cmdlet")
debug(
f"[_capture] Overriding Playwright browser '{current_browser}' -> 'chromium' for screen-shot cmdlet"
)
base_cfg = {}
try:
base_cfg = dict(getattr(tool, "_config", {}) or {})
except Exception:
base_cfg = {}
tool_block = dict(base_cfg.get("tool") or {}) if isinstance(base_cfg, dict) else {}
pw_block = dict(tool_block.get("playwright") or {}) if isinstance(tool_block, dict) else {}
pw_block = (
dict(tool_block.get("playwright") or {}) if isinstance(tool_block, dict) else {}
)
pw_block["browser"] = "chromium"
tool_block["playwright"] = pw_block
if isinstance(base_cfg, dict):
@@ -591,7 +616,9 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str],
for sel in selectors:
try:
debug(f"Trying selector: {sel}")
el = page.wait_for_selector(sel, timeout=max(0, int(options.selector_timeout_ms)))
el = page.wait_for_selector(
sel, timeout=max(0, int(options.selector_timeout_ms))
)
except PlaywrightTimeoutError:
debug(f"Selector not found: {sel}")
continue
@@ -604,7 +631,10 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str],
pass
progress.step("capturing output")
debug(f"Capturing element to {destination}...")
el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None))
el.screenshot(
path=str(destination),
type=("jpeg" if format_name == "jpeg" else None),
)
element_captured = True
debug("Element captured successfully")
break
@@ -645,8 +675,13 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str],
except Exception as exc:
debug(f"[_capture] Exception launching browser/page: {exc}")
msg = str(exc).lower()
if any(k in msg for k in ["executable", "not found", "no such file", "cannot find", "install"]):
raise ScreenshotError("Chromium Playwright browser binaries not found. Install them: python ./scripts/bootstrap.py --playwright-only --browsers chromium") from exc
if any(
k in msg
for k in ["executable", "not found", "no such file", "cannot find", "install"]
):
raise ScreenshotError(
"Chromium Playwright browser binaries not found. Install them: python ./scripts/bootstrap.py --playwright-only --browsers chromium"
) from exc
raise
except ScreenshotError:
# Re-raise ScreenshotError raised intentionally (do not wrap)
@@ -666,7 +701,9 @@ def _capture_screenshot(options: ScreenshotOptions, progress: PipelineProgress)
will_target = bool(options.prefer_platform_target) and requested_format != "pdf"
will_convert = requested_format == "webp"
will_archive = bool(options.archive and options.url)
total_steps = 9 + (1 if will_target else 0) + (1 if will_convert else 0) + (1 if will_archive else 0)
total_steps = (
9 + (1 if will_target else 0) + (1 if will_convert else 0) + (1 if will_archive else 0)
)
progress.begin_steps(total_steps)
progress.step("loading starting")
@@ -726,6 +763,7 @@ def _capture_screenshot(options: ScreenshotOptions, progress: PipelineProgress)
# Main Cmdlet Function
# ============================================================================
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Take screenshots of url in the pipeline.
@@ -801,11 +839,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Extract url from piped results
if piped_results:
for item in piped_results:
url = (
get_field(item, 'path')
or get_field(item, 'url')
or get_field(item, 'target')
)
url = get_field(item, "path") or get_field(item, "url") or get_field(item, "target")
if url:
url_to_process.append((str(url), item))
@@ -883,7 +917,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
def _extract_item_tags(item: Any) -> List[str]:
if item is None:
return []
raw = get_field(item, 'tag')
raw = get_field(item, "tag")
if isinstance(raw, list):
return [str(t) for t in raw if t is not None and str(t).strip()]
if isinstance(raw, str) and raw.strip():
@@ -979,7 +1013,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Compute hash of screenshot file
screenshot_hash = None
try:
with open(screenshot_result.path, 'rb') as f:
with open(screenshot_result.path, "rb") as f:
screenshot_hash = hashlib.sha256(f.read()).hexdigest()
except Exception:
pass
@@ -987,7 +1021,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Create PipeObject result - marked as TEMP since derivative artifact
capture_date = ""
try:
capture_date = datetime.fromtimestamp(screenshot_result.path.stat().st_mtime).date().isoformat()
capture_date = (
datetime.fromtimestamp(screenshot_result.path.stat().st_mtime)
.date()
.isoformat()
)
except Exception:
capture_date = datetime.now().date().isoformat()
@@ -997,7 +1035,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
upstream_tags = _extract_item_tags(origin_item)
filtered_upstream_tags = [
t for t in upstream_tags
t
for t in upstream_tags
if not str(t).strip().lower().startswith(("type:", "date:"))
]
@@ -1007,22 +1046,22 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
)
pipe_obj = create_pipe_object_result(
source='screenshot',
store='PATH',
source="screenshot",
store="PATH",
identifier=Path(screenshot_result.path).stem,
file_path=str(screenshot_result.path),
cmdlet_name='screen-shot',
cmdlet_name="screen-shot",
title=display_title,
hash_value=screenshot_hash,
is_temp=True,
parent_hash=hashlib.sha256(url.encode()).hexdigest(),
tag=merged_tags,
extra={
'source_url': url,
'archive_url': screenshot_result.archive_url,
'url': screenshot_result.url,
'target': str(screenshot_result.path), # Explicit target for add-file
}
"source_url": url,
"archive_url": screenshot_result.archive_url,
"url": screenshot_result.url,
"target": str(screenshot_result.path), # Explicit target for add-file
},
)
# Emit the result so downstream cmdlet (like add-file) can use it
@@ -1038,6 +1077,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
except Exception as exc:
log(f"Unexpected error taking screenshot of {url}: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
exit_code = 1
@@ -1051,6 +1091,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
log(f"✓ Successfully captured {len(all_emitted)} screenshot(s)")
return exit_code
CMDLET = Cmdlet(
name="screen-shot",
summary="Capture a website screenshot",
@@ -1058,16 +1100,17 @@ CMDLET = Cmdlet(
alias=["screenshot", "ss"],
arg=[
SharedArgs.URL,
CmdletArg(name="format", type="string", description="Output format: webp, png, jpeg, or pdf"),
CmdletArg(
name="format", type="string", description="Output format: webp, png, jpeg, or pdf"
),
CmdletArg(name="selector", type="string", description="CSS selector for element capture"),
SharedArgs.PATH
SharedArgs.PATH,
],
detail=[
"Uses Playwright Chromium engine only. Install Chromium with: python ./scripts/bootstrap.py --playwright-only --browsers chromium",
"PDF output requires headless Chromium (the cmdlet will enforce headless mode for PDF).",
"Screenshots are temporary artifacts stored in the configured `temp` directory.",
]
],
)
CMDLET.exec = _run
+63 -22
View File
@@ -1,4 +1,5 @@
"""search-provider cmdlet: Search external providers (bandcamp, libgen, soulseek, youtube, alldebrid)."""
from __future__ import annotations
from typing import Any, Dict, List, Sequence, Optional
@@ -25,6 +26,7 @@ try:
except Exception: # pragma: no cover
get_local_storage_path = None # type: ignore
class Search_Provider(Cmdlet):
"""Search external content providers."""
@@ -34,28 +36,44 @@ class Search_Provider(Cmdlet):
summary="Search external providers (bandcamp, libgen, soulseek, youtube, alldebrid, loc, internetarchive)",
usage="search-provider -provider <provider> <query> [-limit N] [-open ID]",
arg=[
CmdletArg("provider", type="string", required=True, description="Provider name: bandcamp, libgen, soulseek, youtube, alldebrid, loc, internetarchive"),
CmdletArg("query", type="string", required=True, description="Search query (supports provider-specific syntax)"),
CmdletArg("limit", type="int", description="Maximum results to return (default: 50)"),
CmdletArg("open", type="int", description="(alldebrid) Open folder/magnet by ID and list its files"),
CmdletArg(
"provider",
type="string",
required=True,
description="Provider name: bandcamp, libgen, soulseek, youtube, alldebrid, loc, internetarchive",
),
CmdletArg(
"query",
type="string",
required=True,
description="Search query (supports provider-specific syntax)",
),
CmdletArg(
"limit", type="int", description="Maximum results to return (default: 50)"
),
CmdletArg(
"open",
type="int",
description="(alldebrid) Open folder/magnet by ID and list its files",
),
],
detail=[
"Search external content providers:",
"- alldebrid: List your AllDebrid account folders (magnets). Select @N to view files.",
" Example: search-provider -provider alldebrid \"*\"",
" Example: search-provider -provider alldebrid -open 123 \"*\"",
' Example: search-provider -provider alldebrid "*"',
' Example: search-provider -provider alldebrid -open 123 "*"',
"- bandcamp: Search for music albums/tracks",
" Example: search-provider -provider bandcamp \"artist:altrusian grace\"",
' Example: search-provider -provider bandcamp "artist:altrusian grace"',
"- libgen: Search Library Genesis for books",
" Example: search-provider -provider libgen \"python programming\"",
' Example: search-provider -provider libgen "python programming"',
"- loc: Search Library of Congress (Chronicling America)",
" Example: search-provider -provider loc \"lincoln\"",
' Example: search-provider -provider loc "lincoln"',
"- soulseek: Search P2P network for music",
" Example: search-provider -provider soulseek \"pink floyd\"",
' Example: search-provider -provider soulseek "pink floyd"',
"- youtube: Search YouTube for videos",
" Example: search-provider -provider youtube \"tutorial\"",
' Example: search-provider -provider youtube "tutorial"',
"- internetarchive: Search archive.org items (advancedsearch syntax)",
" Example: search-provider -provider internetarchive \"title:(lincoln) AND mediatype:texts\"",
' Example: search-provider -provider internetarchive "title:(lincoln) AND mediatype:texts"',
"",
"Query syntax:",
"- bandcamp: Use 'artist:Name' to search by artist",
@@ -65,9 +83,9 @@ class Search_Provider(Cmdlet):
"- internetarchive: Archive.org advancedsearch query syntax",
"",
"Results can be piped to other cmdlet:",
" search-provider -provider bandcamp \"artist:grace\" | @1 | download-file",
' search-provider -provider bandcamp "artist:grace" | @1 | download-file',
],
exec=self.run
exec=self.run,
)
self.register()
@@ -81,7 +99,9 @@ class Search_Provider(Cmdlet):
# Dynamic flag variants from cmdlet arg definitions.
flag_registry = self.build_flag_registry()
provider_flags = {f.lower() for f in (flag_registry.get("provider") or {"-provider", "--provider"})}
provider_flags = {
f.lower() for f in (flag_registry.get("provider") or {"-provider", "--provider"})
}
query_flags = {f.lower() for f in (flag_registry.get("query") or {"-query", "--query"})}
limit_flags = {f.lower() for f in (flag_registry.get("limit") or {"-limit", "--limit"})}
open_flags = {f.lower() for f in (flag_registry.get("open") or {"-open", "--open"})}
@@ -106,14 +126,20 @@ class Search_Provider(Cmdlet):
try:
limit = int(args_list[i + 1])
except ValueError:
log(f"Warning: Invalid limit value '{args_list[i + 1]}', using default 50", file=sys.stderr)
log(
f"Warning: Invalid limit value '{args_list[i + 1]}', using default 50",
file=sys.stderr,
)
limit = 50
i += 2
elif low in open_flags and i + 1 < len(args_list):
try:
open_id = int(args_list[i + 1])
except ValueError:
log(f"Warning: Invalid open value '{args_list[i + 1]}', ignoring", file=sys.stderr)
log(
f"Warning: Invalid open value '{args_list[i + 1]}', ignoring",
file=sys.stderr,
)
open_id = None
i += 2
elif not token.startswith("-"):
@@ -160,6 +186,7 @@ class Search_Provider(Cmdlet):
if library_root:
try:
from API.folder import API_folder_store
db = API_folder_store(library_root)
except Exception:
db = None
@@ -178,6 +205,7 @@ class Search_Provider(Cmdlet):
results_list = []
import result_table
importlib.reload(result_table)
from result_table import ResultTable
@@ -190,7 +218,9 @@ class Search_Provider(Cmdlet):
elif provider_lower == "loc":
provider_label = "LoC"
else:
provider_label = provider_text[:1].upper() + provider_text[1:] if provider_text else "Provider"
provider_label = (
provider_text[:1].upper() + provider_text[1:] if provider_text else "Provider"
)
if provider_lower == "alldebrid" and open_id is not None:
table_title = f"{provider_label} Files: {open_id}".strip().rstrip(":")
@@ -205,7 +235,9 @@ class Search_Provider(Cmdlet):
if provider_lower == "alldebrid":
if open_id is not None:
# Second-stage: show files for selected folder/magnet.
results = provider.search(query, limit=limit, filters={"view": "files", "magnet_id": open_id})
results = provider.search(
query, limit=limit, filters={"view": "files", "magnet_id": open_id}
)
else:
# Default: show folders (magnets) so user can select @N.
results = provider.search(query, limit=limit, filters={"view": "folders"})
@@ -222,7 +254,11 @@ class Search_Provider(Cmdlet):
# Emit results for pipeline
for search_result in results:
item_dict = search_result.to_dict() if hasattr(search_result, "to_dict") else dict(search_result)
item_dict = (
search_result.to_dict()
if hasattr(search_result, "to_dict")
else dict(search_result)
)
# Ensure table field is set (should be by provider, but just in case)
if "table" not in item_dict:
@@ -233,13 +269,18 @@ class Search_Provider(Cmdlet):
# For AllDebrid folder rows, allow @N to open and show files.
try:
if provider_lower == "alldebrid" and getattr(search_result, "media_kind", "") == "folder":
if (
provider_lower == "alldebrid"
and getattr(search_result, "media_kind", "") == "folder"
):
magnet_id = None
meta = getattr(search_result, "full_metadata", None)
if isinstance(meta, dict):
magnet_id = meta.get("magnet_id")
if magnet_id is not None:
table.set_row_selection_args(row_index, ["-open", str(magnet_id), "-query", "*"])
table.set_row_selection_args(
row_index, ["-open", str(magnet_id), "-query", "*"]
)
except Exception:
pass
results_list.append(item_dict)
+49 -17
View File
@@ -1,4 +1,5 @@
"""Search-store cmdlet: Search for files in storage backends (Folder, Hydrus)."""
from __future__ import annotations
from typing import Any, Dict, Sequence, List, Optional
@@ -12,7 +13,16 @@ from SYS.logger import log, debug
from . import _shared as sh
Cmdlet, CmdletArg, SharedArgs, get_field, should_show_help, normalize_hash, first_title_tag, parse_hash_query = (
(
Cmdlet,
CmdletArg,
SharedArgs,
get_field,
should_show_help,
normalize_hash,
first_title_tag,
parse_hash_query,
) = (
sh.Cmdlet,
sh.CmdletArg,
sh.SharedArgs,
@@ -37,9 +47,9 @@ class Search_Store(Cmdlet):
summary="Search storage backends (Folder, Hydrus) for files.",
usage="search-store [-query <query>] [-store BACKEND] [-limit N]",
arg=[
CmdletArg("query", description="Search query string"),
CmdletArg("limit", type="integer", description="Limit results (default: 100)"),
SharedArgs.STORE,
SharedArgs.QUERY,
],
detail=[
"Search across storage backends: Folder stores and Hydrus instances",
@@ -84,14 +94,16 @@ class Search_Store(Cmdlet):
# Ensure we have title field
if "title" not in payload:
payload["title"] = payload.get("name") or payload.get("target") or payload.get("path") or "Result"
payload["title"] = (
payload.get("name") or payload.get("target") or payload.get("path") or "Result"
)
# Ensure we have ext field
if "ext" not in payload:
title = str(payload.get("title", ""))
path_obj = Path(title)
if path_obj.suffix:
payload["ext"] = self._normalize_extension(path_obj.suffix.lstrip('.'))
payload["ext"] = self._normalize_extension(path_obj.suffix.lstrip("."))
else:
payload["ext"] = payload.get("ext", "")
@@ -134,11 +146,15 @@ class Search_Store(Cmdlet):
raw_title = None
try:
raw_title = ctx.get_current_stage_text("") if hasattr(ctx, "get_current_stage_text") else None
raw_title = (
ctx.get_current_stage_text("") if hasattr(ctx, "get_current_stage_text") else None
)
except Exception:
raw_title = None
command_title = (str(raw_title).strip() if raw_title else "") or _format_command_title("search-store", list(args_list))
command_title = (str(raw_title).strip() if raw_title else "") or _format_command_title(
"search-store", list(args_list)
)
# Build dynamic flag variants from cmdlet arg definitions.
# This avoids hardcoding flag spellings in parsing loops.
@@ -184,7 +200,7 @@ class Search_Store(Cmdlet):
store_filter = match.group(1).strip() or None
query = re.sub(r"\s*[,]?\s*store:[^\s,]+", " ", query, flags=re.IGNORECASE)
query = re.sub(r"\s{2,}", " ", query)
query = query.strip().strip(',')
query = query.strip().strip(",")
if store_filter and not storage_backend:
storage_backend = store_filter
@@ -198,6 +214,7 @@ class Search_Store(Cmdlet):
from API.folder import API_folder_store
from config import get_local_storage_path
import uuid
worker_id = str(uuid.uuid4())
library_root = get_local_storage_path(config or {})
if not library_root:
@@ -212,12 +229,13 @@ class Search_Store(Cmdlet):
"search-store",
title=f"Search: {query}",
description=f"Query: {query}",
pipe=ctx.get_current_command_text()
pipe=ctx.get_current_command_text(),
)
results_list = []
import result_table
import importlib
importlib.reload(result_table)
from result_table import ResultTable
@@ -233,6 +251,7 @@ class Search_Store(Cmdlet):
pass
from Store import Store
storage = Store(config=config or {})
from Store._base import Store as BaseStore
@@ -301,7 +320,11 @@ class Search_Store(Cmdlet):
else:
maybe_tags = tag_result
if isinstance(maybe_tags, list):
tags_list = [str(t).strip() for t in maybe_tags if isinstance(t, str) and str(t).strip()]
tags_list = [
str(t).strip()
for t in maybe_tags
if isinstance(t, str) and str(t).strip()
]
except Exception:
tags_list = []
@@ -336,7 +359,9 @@ class Search_Store(Cmdlet):
if size_bytes is None:
size_bytes = meta_obj.get("size_bytes")
try:
size_bytes_int: Optional[int] = int(size_bytes) if size_bytes is not None else None
size_bytes_int: Optional[int] = (
int(size_bytes) if size_bytes is not None else None
)
except Exception:
size_bytes_int = None
@@ -362,7 +387,7 @@ class Search_Store(Cmdlet):
else:
ctx.set_last_result_table(table, results_list)
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
db.update_worker_status(worker_id, 'completed')
db.update_worker_status(worker_id, "completed")
return 0
log("No results found", file=sys.stderr)
@@ -373,15 +398,18 @@ class Search_Store(Cmdlet):
except Exception:
pass
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
db.update_worker_status(worker_id, 'completed')
db.update_worker_status(worker_id, "completed")
return 0
if backend_to_search:
searched_backends.append(backend_to_search)
target_backend = storage[backend_to_search]
if type(target_backend).search is BaseStore.search:
log(f"Backend '{backend_to_search}' does not support searching", file=sys.stderr)
db.update_worker_status(worker_id, 'error')
log(
f"Backend '{backend_to_search}' does not support searching",
file=sys.stderr,
)
db.update_worker_status(worker_id, "error")
return 1
debug(f"[search-store] Searching '{backend_to_search}'")
results = target_backend.search(query, limit=limit)
@@ -395,7 +423,9 @@ class Search_Store(Cmdlet):
debug(f"[search-store] Searching '{backend_name}'")
backend_results = backend.search(query, limit=limit - len(all_results))
debug(f"[search-store] '{backend_name}' -> {len(backend_results or [])} result(s)")
debug(
f"[search-store] '{backend_name}' -> {len(backend_results or [])} result(s)"
)
if backend_results:
all_results.extend(backend_results)
if len(all_results) >= limit:
@@ -406,6 +436,7 @@ class Search_Store(Cmdlet):
if results:
for item in results:
def _as_dict(obj: Any) -> Dict[str, Any]:
if isinstance(obj, dict):
return dict(obj)
@@ -450,15 +481,16 @@ class Search_Store(Cmdlet):
pass
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
db.update_worker_status(worker_id, 'completed')
db.update_worker_status(worker_id, "completed")
return 0
except Exception as exc:
log(f"Search failed: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
try:
db.update_worker_status(worker_id, 'error')
db.update_worker_status(worker_id, "error")
except Exception:
pass
return 1
+67 -37
View File
@@ -1,4 +1,5 @@
"""Trim a media file using ffmpeg."""
from __future__ import annotations
from typing import Any, Dict, Sequence, Optional
@@ -30,9 +31,19 @@ CMDLET = Cmdlet(
usage="trim-file [-path <path>] [-input <path-or-url>] -range <start-end> [-outdir <dir>] [-delete]",
arg=[
CmdletArg("-path", description="Path to the file (optional if piped)."),
CmdletArg("-input", description="Override input media source (path or URL). Useful when piping store metadata but trimming from an mpv stream URL."),
CmdletArg("-range", required=True, description="Time range to trim (e.g. '3:45-3:55', '00:03:45-00:03:55', or '1h3m-1h10m30s')."),
CmdletArg("-outdir", description="Output directory for the clip (defaults to source folder for local files; otherwise uses config temp/videos)."),
CmdletArg(
"-input",
description="Override input media source (path or URL). Useful when piping store metadata but trimming from an mpv stream URL.",
),
CmdletArg(
"-range",
required=True,
description="Time range to trim (e.g. '3:45-3:55', '00:03:45-00:03:55', or '1h3m-1h10m30s').",
),
CmdletArg(
"-outdir",
description="Output directory for the clip (defaults to source folder for local files; otherwise uses config temp/videos).",
),
CmdletArg("-delete", type="flag", description="Delete the original file after trimming."),
],
detail=[
@@ -41,7 +52,7 @@ CMDLET = Cmdlet(
"Inherits tag values from the source file.",
"Adds a relationship to the source file (if hash is available).",
"Output can be piped to add-file.",
]
],
)
@@ -71,6 +82,7 @@ def _format_hms(total_seconds: float) -> str:
return "0s"
return "".join(parts)
def _is_url(value: str) -> bool:
try:
p = urlparse(str(value))
@@ -88,7 +100,7 @@ def _parse_time(time_str: str) -> float:
- SS(.sss)
- 1h3m53s (also 1h3m, 3m53s, 53s)
"""
raw = str(time_str or '').strip()
raw = str(time_str or "").strip()
if not raw:
raise ValueError("Empty time")
@@ -97,15 +109,15 @@ def _parse_time(time_str: str) -> float:
r"(?i)\s*(?:(?P<h>\d+(?:\.\d+)?)h)?(?:(?P<m>\d+(?:\.\d+)?)m)?(?:(?P<s>\d+(?:\.\d+)?)s)?\s*",
raw,
)
if hms and (hms.group('h') or hms.group('m') or hms.group('s')):
hours = float(hms.group('h') or 0)
minutes = float(hms.group('m') or 0)
seconds = float(hms.group('s') or 0)
if hms and (hms.group("h") or hms.group("m") or hms.group("s")):
hours = float(hms.group("h") or 0)
minutes = float(hms.group("m") or 0)
seconds = float(hms.group("s") or 0)
total = hours * 3600 + minutes * 60 + seconds
return float(total)
# Colon-separated
parts = [p.strip() for p in raw.split(':')]
parts = [p.strip() for p in raw.split(":")]
if len(parts) == 3:
return float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[2])
if len(parts) == 2:
@@ -117,15 +129,15 @@ def _parse_time(time_str: str) -> float:
def _sanitize_filename(name: str, *, max_len: int = 140) -> str:
name = str(name or '').strip()
name = str(name or "").strip()
if not name:
return 'clip'
return "clip"
# Windows-forbidden characters: <>:"/\\|?* plus control chars
name = re.sub('[<>:"/\\\\|?*\\x00-\\x1F]', '_', name)
name = re.sub('[<>:"/\\\\|?*\\x00-\\x1F]', "_", name)
name = re.sub(r"\s+", " ", name).strip()
name = name.rstrip('.')
name = name.rstrip(".")
if not name:
return 'clip'
return "clip"
if len(name) > max_len:
name = name[:max_len].rstrip()
return name
@@ -140,7 +152,9 @@ def _extract_store_name(item: Any) -> Optional[str]:
return None
def _persist_alt_relationship(*, config: Dict[str, Any], store_name: str, alt_hash: str, king_hash: str) -> None:
def _persist_alt_relationship(
*, config: Dict[str, Any], store_name: str, alt_hash: str, king_hash: str
) -> None:
"""Persist directional alt -> king relationship in the given backend."""
try:
store = Store(config)
@@ -155,7 +169,11 @@ def _persist_alt_relationship(*, config: Dict[str, Any], store_name: str, alt_ha
# Folder-backed local DB
try:
if type(backend).__name__ == "Folder" and hasattr(backend, "location") and callable(getattr(backend, "location")):
if (
type(backend).__name__ == "Folder"
and hasattr(backend, "location")
and callable(getattr(backend, "location"))
):
from API.folder import API_folder_store
from pathlib import Path
@@ -174,12 +192,15 @@ def _persist_alt_relationship(*, config: Dict[str, Any], store_name: str, alt_ha
except Exception:
return
def _trim_media(input_source: str, output_path: Path, start_seconds: float, duration_seconds: float) -> bool:
def _trim_media(
input_source: str, output_path: Path, start_seconds: float, duration_seconds: float
) -> bool:
"""Trim media using ffmpeg.
input_source may be a local path or a URL.
"""
ffmpeg_path = shutil.which('ffmpeg')
ffmpeg_path = shutil.which("ffmpeg")
if not ffmpeg_path:
log("ffmpeg not found in PATH", file=sys.stderr)
return False
@@ -190,12 +211,18 @@ def _trim_media(input_source: str, output_path: Path, start_seconds: float, dura
return False
cmd = [
ffmpeg_path, '-y',
'-ss', str(float(start_seconds)),
'-i', str(input_source),
'-t', str(float(duration_seconds)),
'-c', 'copy',
'-map_metadata', '0',
ffmpeg_path,
"-y",
"-ss",
str(float(start_seconds)),
"-i",
str(input_source),
"-t",
str(float(duration_seconds)),
"-c",
"copy",
"-map_metadata",
"0",
str(output_path),
]
@@ -211,17 +238,18 @@ def _trim_media(input_source: str, output_path: Path, start_seconds: float, dura
log(f"Error parsing time or running ffmpeg: {e}", file=sys.stderr)
return False
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Trim a media file."""
# Parse arguments
parsed = parse_cmdlet_args(args, CMDLET)
range_arg = parsed.get("range")
if not range_arg or '-' not in range_arg:
if not range_arg or "-" not in range_arg:
log("Error: -range argument required (format: start-end)", file=sys.stderr)
return 1
start_str, end_str = [s.strip() for s in range_arg.split('-', 1)]
start_str, end_str = [s.strip() for s in range_arg.split("-", 1)]
if not start_str or not end_str:
log("Error: -range must be start-end", file=sys.stderr)
return 1
@@ -290,11 +318,13 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
output_dir = Path(str(outdir_arg)).expanduser()
elif store_name:
from config import resolve_output_dir
output_dir = resolve_output_dir(config or {})
elif path_obj is not None:
output_dir = path_obj.parent
else:
from config import resolve_output_dir
output_dir = resolve_output_dir(config or {})
try:
@@ -303,7 +333,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
pass
# Determine output filename
output_ext = ''
output_ext = ""
if path_obj is not None:
output_ext = path_obj.suffix
base_name = path_obj.stem
@@ -313,21 +343,21 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if title:
base_name = _sanitize_filename(str(title))
else:
base_name = time.strftime('%Y%m%d-%H%M%S')
base_name = time.strftime("%Y%m%d-%H%M%S")
if base_name.lower().startswith('clip_'):
if base_name.lower().startswith("clip_"):
base_name = base_name[5:] or base_name
try:
p = urlparse(str(media_source))
last = (p.path or '').split('/')[-1]
if last and '.' in last:
output_ext = '.' + last.split('.')[-1]
last = (p.path or "").split("/")[-1]
if last and "." in last:
output_ext = "." + last.split(".")[-1]
except Exception:
pass
if not output_ext or len(output_ext) > 8:
output_ext = '.mkv'
output_ext = ".mkv"
new_filename = f"clip_{base_name}{output_ext}"
output_path = output_dir / new_filename
@@ -343,7 +373,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
break
# Trim
source_label = (path_obj.name if path_obj is not None else str(media_source))
source_label = path_obj.name if path_obj is not None else str(media_source)
log(f"Trimming {source_label} ({start_str} to {end_str})...", file=sys.stderr)
if _trim_media(str(media_source), output_path, start_seconds, duration_seconds):
log(f"Created clip: {output_path}", file=sys.stderr)
@@ -457,7 +487,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Provide both keys so downstream (e.g. add-file) can persist relationships.
"king": [source_hash] if source_hash else [],
"alt": [clip_hash] if (source_hash and clip_hash) else [],
}
},
}
# Emit result
@@ -475,7 +505,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
log(f"Failed to delete original: {e}", file=sys.stderr)
else:
failed_label = (path_obj.name if path_obj is not None else str(media_source))
failed_label = path_obj.name if path_obj is not None else str(media_source)
log(f"Failed to trim {failed_label}", file=sys.stderr)
return 0 if success_count > 0 else 1
+19 -4
View File
@@ -20,6 +20,7 @@ def _should_hide_db_args(config: Optional[Dict[str, Any]]) -> bool:
except Exception:
return False
try:
from cmdlet import REGISTRY
except Exception:
@@ -93,7 +94,9 @@ def _normalize_arg(arg: Any) -> Dict[str, Any]:
}
def get_cmdlet_metadata(cmd_name: str, config: Optional[Dict[str, Any]] = None) -> Optional[Dict[str, Any]]:
def get_cmdlet_metadata(
cmd_name: str, config: Optional[Dict[str, Any]] = None
) -> Optional[Dict[str, Any]]:
"""Return normalized metadata for a cmdlet, if available (aliases supported)."""
ensure_registry_loaded()
normalized = cmd_name.replace("-", "_")
@@ -184,12 +187,22 @@ def list_cmdlet_metadata(config: Optional[Dict[str, Any]] = None) -> Dict[str, D
else:
entries.setdefault(
canonical,
{"name": canonical, "aliases": [], "usage": "", "summary": "", "details": [], "args": [], "raw": None},
{
"name": canonical,
"aliases": [],
"usage": "",
"summary": "",
"details": [],
"args": [],
"raw": None,
},
)
return entries
def list_cmdlet_names(include_aliases: bool = True, config: Optional[Dict[str, Any]] = None) -> List[str]:
def list_cmdlet_names(
include_aliases: bool = True, config: Optional[Dict[str, Any]] = None
) -> List[str]:
"""Return sorted cmdlet names (optionally including aliases)."""
ensure_registry_loaded()
entries = list_cmdlet_metadata(config=config)
@@ -223,7 +236,9 @@ def get_cmdlet_arg_flags(cmd_name: str, config: Optional[Dict[str, Any]] = None)
return flags
def get_cmdlet_arg_choices(cmd_name: str, arg_name: str, config: Optional[Dict[str, Any]] = None) -> List[str]:
def get_cmdlet_arg_choices(
cmd_name: str, arg_name: str, config: Optional[Dict[str, Any]] = None
) -> List[str]:
"""Return declared choices for a cmdlet argument."""
meta = get_cmdlet_metadata(cmd_name, config=config)
if not meta:
+4 -1
View File
@@ -32,7 +32,9 @@ def register_native_commands(registry: Dict[str, CmdletFn]) -> None:
"""Import native command modules and register their CMDLET exec functions."""
base_dir = os.path.dirname(__file__)
for filename in os.listdir(base_dir):
if not (filename.endswith(".py") and not filename.startswith("_") and filename != "__init__.py"):
if not (
filename.endswith(".py") and not filename.startswith("_") and filename != "__init__.py"
):
continue
mod_name = filename[:-3]
@@ -43,5 +45,6 @@ def register_native_commands(registry: Dict[str, CmdletFn]) -> None:
_register_cmdlet_object(cmdlet_obj, registry)
except Exception as exc:
import sys
print(f"Error importing native command '{mod_name}': {exc}", file=sys.stderr)
continue
+10 -4
View File
@@ -7,26 +7,31 @@ from SYS.logger import log
from result_table import ResultTable
import pipeline as ctx
ADJECTIVE_FILE = os.path.join(os.path.dirname(os.path.dirname(__file__)), "cmdnat", "adjective.json")
ADJECTIVE_FILE = os.path.join(
os.path.dirname(os.path.dirname(__file__)), "cmdnat", "adjective.json"
)
def _load_adjectives() -> Dict[str, List[str]]:
try:
if os.path.exists(ADJECTIVE_FILE):
with open(ADJECTIVE_FILE, 'r', encoding='utf-8') as f:
with open(ADJECTIVE_FILE, "r", encoding="utf-8") as f:
return json.load(f)
except Exception as e:
log(f"Error loading adjectives: {e}", file=sys.stderr)
return {}
def _save_adjectives(data: Dict[str, List[str]]) -> bool:
try:
with open(ADJECTIVE_FILE, 'w', encoding='utf-8') as f:
with open(ADJECTIVE_FILE, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
return True
except Exception as e:
log(f"Error saving adjectives: {e}", file=sys.stderr)
return False
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
data = _load_adjectives()
@@ -137,6 +142,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
return 0
CMDLET = Cmdlet(
name=".adjective",
alias=["adj"],
@@ -148,5 +154,5 @@ CMDLET = Cmdlet(
CmdletArg(name="add", type="flag", description="Add tag"),
CmdletArg(name="delete", type="flag", description="Delete tag"),
],
exec=_run
exec=_run,
)
+26 -21
View File
@@ -9,38 +9,38 @@ CMDLET = Cmdlet(
usage=".config [key] [value]",
arg=[
CmdletArg(
name="key",
description="Configuration key to update (dot-separated)",
required=False
name="key", description="Configuration key to update (dot-separated)", required=False
),
CmdletArg(
name="value",
description="New value for the configuration key",
required=False
)
]
CmdletArg(name="value", description="New value for the configuration key", required=False),
],
)
def flatten_config(config: Dict[str, Any], parent_key: str = '', sep: str = '.') -> List[Dict[str, Any]]:
def flatten_config(
config: Dict[str, Any], parent_key: str = "", sep: str = "."
) -> List[Dict[str, Any]]:
items = []
for k, v in config.items():
if k.startswith('_'): # Skip internal keys
if k.startswith("_"): # Skip internal keys
continue
new_key = f"{parent_key}{sep}{k}" if parent_key else k
if isinstance(v, dict):
items.extend(flatten_config(v, new_key, sep=sep))
else:
items.append({
items.append(
{
"Key": new_key,
"Value": str(v),
"Type": type(v).__name__,
"_selection_args": [new_key]
})
"_selection_args": [new_key],
}
)
return items
def set_nested_config(config: Dict[str, Any], key: str, value: str) -> bool:
keys = key.split('.')
keys = key.split(".")
d = config
# Navigate to the parent dict
@@ -55,9 +55,9 @@ def set_nested_config(config: Dict[str, Any], key: str, value: str) -> bool:
if last_key in d:
current_val = d[last_key]
if isinstance(current_val, bool):
if value.lower() in ('true', 'yes', '1', 'on'):
if value.lower() in ("true", "yes", "1", "on"):
d[last_key] = True
elif value.lower() in ('false', 'no', '0', 'off'):
elif value.lower() in ("false", "no", "0", "off"):
d[last_key] = False
else:
# Fallback to boolean conversion of string (usually True for non-empty)
@@ -80,8 +80,8 @@ def set_nested_config(config: Dict[str, Any], key: str, value: str) -> bool:
d[last_key] = value
else:
# New key, try to infer type
if value.lower() in ('true', 'false'):
d[last_key] = (value.lower() == 'true')
if value.lower() in ("true", "false"):
d[last_key] = value.lower() == "true"
elif value.isdigit():
d[last_key] = int(value)
else:
@@ -89,6 +89,7 @@ def set_nested_config(config: Dict[str, Any], key: str, value: str) -> bool:
return True
def _run(piped_result: Any, args: List[str], config: Dict[str, Any]) -> int:
# Reload config to ensure we have the latest on disk
# We don't use the passed 'config' because we want to edit the file
@@ -106,10 +107,11 @@ def _run(piped_result: Any, args: List[str], config: Dict[str, Any]) -> int:
# List mode
items = flatten_config(current_config)
# Sort by key
items.sort(key=lambda x: x['Key'])
items.sort(key=lambda x: x["Key"])
# Emit items for ResultTable
import pipeline as ctx
for item in items:
ctx.emit(item)
return 0
@@ -124,7 +126,9 @@ def _run(piped_result: Any, args: List[str], config: Dict[str, Any]) -> int:
value = " ".join(args[1:])
# Remove quotes if present
if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")):
if (value.startswith('"') and value.endswith('"')) or (
value.startswith("'") and value.endswith("'")
):
value = value[1:-1]
try:
@@ -136,4 +140,5 @@ def _run(piped_result: Any, args: List[str], config: Dict[str, Any]) -> int:
print(f"Error updating config: {e}")
return 1
CMDLET.exec = _run
+3 -1
View File
@@ -38,7 +38,9 @@ def _find_cmd_metadata(name: str, metadata: Dict[str, Dict[str, Any]]) -> Option
return None
def _render_list(metadata: Dict[str, Dict[str, Any]], filter_text: Optional[str], args: Sequence[str]) -> None:
def _render_list(
metadata: Dict[str, Dict[str, Any]], filter_text: Optional[str], args: Sequence[str]
) -> None:
table = ResultTable("Help")
table.set_source_command(".help", list(args))
+67 -15
View File
@@ -169,6 +169,7 @@ def _extract_file_path(item: Any) -> Optional[str]:
Returns a filesystem path string only if it exists.
"""
def _maybe_local_path(value: Any) -> Optional[str]:
if value is None:
return None
@@ -225,7 +226,11 @@ def _extract_url(item: Any) -> Optional[str]:
if isinstance(item, dict):
for key in ("url", "source_url", "path", "target"):
raw = item.get(key)
if isinstance(raw, str) and raw.strip() and raw.strip().startswith(("http://", "https://")):
if (
isinstance(raw, str)
and raw.strip()
and raw.strip().startswith(("http://", "https://"))
):
return raw.strip()
except Exception:
pass
@@ -264,7 +269,9 @@ def _extract_hash_from_hydrus_file_url(url: str) -> Optional[str]:
return None
def _maybe_download_hydrus_file(item: Any, config: Dict[str, Any], output_dir: Path) -> Optional[str]:
def _maybe_download_hydrus_file(
item: Any, config: Dict[str, Any], output_dir: Path
) -> Optional[str]:
"""If the item looks like a Hydrus file (hash + Hydrus URL), download it using Hydrus access key headers.
This avoids 401 from Hydrus when the URL is /get_files/file?hash=... without headers.
@@ -307,18 +314,24 @@ def _maybe_download_hydrus_file(item: Any, config: Dict[str, Any], output_dir: P
is_hydrus_url = False
if url:
parsed = urlparse(url)
is_hydrus_url = (parsed.path or "").endswith("/get_files/file") and _extract_hash_from_hydrus_file_url(url) == file_hash
is_hydrus_url = (parsed.path or "").endswith(
"/get_files/file"
) and _extract_hash_from_hydrus_file_url(url) == file_hash
hydrus_instances: set[str] = set()
try:
store_cfg = (config or {}).get("store") if isinstance(config, dict) else None
if isinstance(store_cfg, dict):
hydrus_cfg = store_cfg.get("hydrusnetwork")
if isinstance(hydrus_cfg, dict):
hydrus_instances = {str(k).strip().lower() for k in hydrus_cfg.keys() if str(k).strip()}
hydrus_instances = {
str(k).strip().lower() for k in hydrus_cfg.keys() if str(k).strip()
}
except Exception:
hydrus_instances = set()
store_hint = store_name.lower() in {"hydrus", "hydrusnetwork"} or (store_name.lower() in hydrus_instances)
store_hint = store_name.lower() in {"hydrus", "hydrusnetwork"} or (
store_name.lower() in hydrus_instances
)
if not (is_hydrus_url or store_hint):
return None
@@ -402,7 +415,11 @@ def _resolve_upload_path(item: Any, config: Dict[str, Any]) -> Optional[str]:
base_tmp = None
if isinstance(config, dict):
base_tmp = config.get("temp")
output_dir = Path(str(base_tmp)).expanduser() if base_tmp else (Path(tempfile.gettempdir()) / "Medios-Macina")
output_dir = (
Path(str(base_tmp)).expanduser()
if base_tmp
else (Path(tempfile.gettempdir()) / "Medios-Macina")
)
output_dir = output_dir / "matrix" / "hydrus"
hydrus_path = _maybe_download_hydrus_file(item, config, output_dir)
if hydrus_path:
@@ -423,11 +440,20 @@ def _resolve_upload_path(item: Any, config: Dict[str, Any]) -> Optional[str]:
base_tmp = None
if isinstance(config, dict):
base_tmp = config.get("temp")
output_dir = Path(str(base_tmp)).expanduser() if base_tmp else (Path(tempfile.gettempdir()) / "Medios-Macina")
output_dir = (
Path(str(base_tmp)).expanduser()
if base_tmp
else (Path(tempfile.gettempdir()) / "Medios-Macina")
)
output_dir = output_dir / "matrix"
output_dir.mkdir(parents=True, exist_ok=True)
result = _download_direct_file(url, output_dir, quiet=True)
if result and hasattr(result, "path") and isinstance(result.path, Path) and result.path.exists():
if (
result
and hasattr(result, "path")
and isinstance(result.path, Path)
and result.path.exists()
):
return str(result.path)
except Exception as exc:
debug(f"[matrix] Failed to download URL for upload: {exc}")
@@ -467,6 +493,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
return 1
from Provider.matrix import Matrix
try:
provider = Matrix(config)
except Exception as exc:
@@ -490,7 +517,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
file_path = _resolve_upload_path(item, config)
if not file_path:
any_failed = True
log("Matrix upload requires a local file (path) or a direct URL on the selected item", file=sys.stderr)
log(
"Matrix upload requires a local file (path) or a direct URL on the selected item",
file=sys.stderr,
)
continue
media_path = Path(file_path)
@@ -561,6 +591,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
pass
from Provider.matrix import Matrix
try:
provider = Matrix(config)
except Exception as exc:
@@ -581,7 +612,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Diagnostics if a configured filter yields no rows (provider filtered before name lookups for speed).
if not rooms and not _has_flag(args, "-all"):
configured_ids_dbg = [str(v).strip() for v in _parse_config_room_filter_ids(config) if str(v).strip()]
configured_ids_dbg = [
str(v).strip() for v in _parse_config_room_filter_ids(config) if str(v).strip()
]
if configured_ids_dbg:
try:
joined_ids = provider.list_joined_room_ids()
@@ -592,7 +625,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if not rooms:
if _parse_config_room_filter_ids(config) and not _has_flag(args, "-all"):
log("No joined rooms matched the configured Matrix room filter (use: .matrix -all)", file=sys.stderr)
log(
"No joined rooms matched the configured Matrix room filter (use: .matrix -all)",
file=sys.stderr,
)
else:
log("No joined rooms found.", file=sys.stderr)
return 0
@@ -630,15 +666,31 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
ctx.set_pending_pipeline_tail([[".matrix", "-send"]], ".matrix")
return 0
CMDLET = Cmdlet(
name=".matrix",
alias=["matrix", "rooms"],
summary="Send selected items to a Matrix room",
usage="@N | .matrix",
arg=[
CmdletArg(name="send", type="bool", description="(internal) Send to selected room(s)", required=False),
CmdletArg(name="all", type="bool", description="Ignore config room filter and show all joined rooms", required=False),
CmdletArg(name="text", type="string", description="Send a follow-up text message after each upload (caption-like)", required=False),
CmdletArg(
name="send",
type="bool",
description="(internal) Send to selected room(s)",
required=False,
),
CmdletArg(
name="all",
type="bool",
description="Ignore config room filter and show all joined rooms",
required=False,
),
CmdletArg(
name="text",
type="string",
description="Send a follow-up text message after each upload (caption-like)",
required=False,
),
],
exec=_run
exec=_run,
)
+1 -5
View File
@@ -112,11 +112,7 @@ def _get_active_table(piped_result: Any) -> Optional[Any]:
if piped_result.__class__.__name__ == "ResultTable":
return piped_result
return (
ctx.get_display_table()
or ctx.get_current_stage_table()
or ctx.get_last_result_table()
)
return ctx.get_display_table() or ctx.get_current_stage_table() or ctx.get_last_result_table()
def _run(piped_result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
+172 -83
View File
@@ -18,7 +18,6 @@ from API.folder import LocalLibrarySearchOptimizer
from config import get_local_storage_path, get_hydrus_access_key, get_hydrus_url
_ALLDEBRID_UNLOCK_CACHE: Dict[str, str] = {}
@@ -69,10 +68,14 @@ def _try_enable_mpv_file_logging(mpv_log_path: str, *, attempts: int = 3) -> boo
# Try to set log-file and verbose level.
r1 = _send_ipc_command({"command": ["set_property", "options/log-file", mpv_log_path]})
r2 = _send_ipc_command({"command": ["set_property", "options/msg-level", "all=v"]})
ok = bool((r1 and r1.get("error") == "success") or (r2 and r2.get("error") == "success"))
ok = bool(
(r1 and r1.get("error") == "success") or (r2 and r2.get("error") == "success")
)
# Emit a predictable line so the file isn't empty if logging is active.
_send_ipc_command({"command": ["print-text", f"medeia: log enabled -> {mpv_log_path}"]}, silent=True)
_send_ipc_command(
{"command": ["print-text", f"medeia: log enabled -> {mpv_log_path}"]}, silent=True
)
except Exception:
ok = False
@@ -86,6 +89,7 @@ def _try_enable_mpv_file_logging(mpv_log_path: str, *, attempts: int = 3) -> boo
try:
import time
time.sleep(0.15)
except Exception:
break
@@ -168,6 +172,7 @@ def _ensure_lyric_overlay(mpv: MPV) -> None:
except Exception:
pass
def _send_ipc_command(command: Dict[str, Any], silent: bool = False) -> Optional[Any]:
"""Send a command to the MPV IPC pipe and return the response."""
try:
@@ -189,6 +194,7 @@ def _get_playlist(silent: bool = False) -> Optional[List[Dict[str, Any]]]:
return resp.get("data", [])
return []
def _extract_title_from_item(item: Dict[str, Any]) -> str:
"""Extract a clean title from an MPV playlist item, handling memory:// M3U hacks."""
title = item.get("title")
@@ -210,7 +216,7 @@ def _extract_title_from_item(item: Dict[str, Any]) -> str:
lines = filename.splitlines()
for line in lines:
line = line.strip()
if line and not line.startswith('#') and not line.startswith('memory://'):
if line and not line.startswith("#") and not line.startswith("memory://"):
# Found the URL, use it as title
return line
except Exception:
@@ -225,7 +231,7 @@ def _extract_target_from_memory_uri(text: str) -> Optional[str]:
return None
for line in text.splitlines():
line = line.strip()
if not line or line.startswith('#') or line.startswith('memory://'):
if not line or line.startswith("#") or line.startswith("memory://"):
continue
return line
return None
@@ -324,11 +330,13 @@ def _normalize_playlist_path(text: Optional[str]) -> Optional[str]:
pass
# Normalize slashes for Windows paths and lowercase for comparison
real = real.replace('\\', '/')
real = real.replace("\\", "/")
return real.lower()
def _infer_store_from_playlist_item(item: Dict[str, Any], file_storage: Optional[Any] = None) -> str:
def _infer_store_from_playlist_item(
item: Dict[str, Any], file_storage: Optional[Any] = None
) -> str:
"""Infer a friendly store label from an MPV playlist entry.
Args:
@@ -423,7 +431,7 @@ def _infer_store_from_playlist_item(item: Dict[str, Any], file_storage: Optional
return hydrus_instance
return "hydrus"
parts = host_stripped.split('.')
parts = host_stripped.split(".")
if len(parts) >= 2:
return parts[-2] or host_stripped
return host_stripped
@@ -440,7 +448,9 @@ def _build_hydrus_header(config: Dict[str, Any]) -> Optional[str]:
return f"Hydrus-Client-API-Access-Key: {key}"
def _build_ytdl_options(config: Optional[Dict[str, Any]], hydrus_header: Optional[str]) -> Optional[str]:
def _build_ytdl_options(
config: Optional[Dict[str, Any]], hydrus_header: Optional[str]
) -> Optional[str]:
"""Compose ytdl-raw-options string including cookies and optional Hydrus header."""
opts: List[str] = []
cookies_path = None
@@ -454,7 +464,7 @@ def _build_ytdl_options(config: Optional[Dict[str, Any]], hydrus_header: Optiona
cookies_path = None
if cookies_path:
opts.append(f"cookies={cookies_path.replace('\\', '/')}" )
opts.append(f"cookies={cookies_path.replace('\\', '/')}")
else:
opts.append("cookies-from-browser=chrome")
if hydrus_header:
@@ -484,9 +494,11 @@ def _is_hydrus_path(path: str, hydrus_url: Optional[str]) -> bool:
return True
return False
def _ensure_ytdl_cookies(config: Optional[Dict[str, Any]] = None) -> None:
"""Ensure yt-dlp options are set correctly for this session."""
from pathlib import Path
cookies_path = None
try:
from tool.ytdlp import YtDlpTool
@@ -498,7 +510,7 @@ def _ensure_ytdl_cookies(config: Optional[Dict[str, Any]] = None) -> None:
cookies_path = None
if cookies_path:
# Check if file exists and has content (use forward slashes for path checking)
check_path = cookies_path.replace('\\', '/')
check_path = cookies_path.replace("\\", "/")
file_obj = Path(cookies_path)
if file_obj.exists():
file_size = file_obj.stat().st_size
@@ -508,6 +520,7 @@ def _ensure_ytdl_cookies(config: Optional[Dict[str, Any]] = None) -> None:
else:
debug("No cookies file configured")
def _monitor_mpv_logs(duration: float = 3.0) -> None:
"""Monitor MPV logs for a short duration to capture errors."""
try:
@@ -526,6 +539,7 @@ def _monitor_mpv_logs(duration: float = 3.0) -> None:
return
import time
start_time = time.time()
# Unix sockets already have timeouts set; read until duration expires
@@ -585,7 +599,11 @@ def _tail_text_file(path: str, *, max_lines: int = 120, max_bytes: int = 65536)
return lines
except Exception:
return []
def _get_playable_path(item: Any, file_storage: Optional[Any], config: Optional[Dict[str, Any]]) -> Optional[tuple[str, Optional[str]]]:
def _get_playable_path(
item: Any, file_storage: Optional[Any], config: Optional[Dict[str, Any]]
) -> Optional[tuple[str, Optional[str]]]:
"""Extract a playable path/URL from an item, handling different store types.
Args:
@@ -614,13 +632,25 @@ def _get_playable_path(item: Any, file_storage: Optional[Any], config: Optional[
title = item.get("title") or item.get("file_title")
store = item.get("store")
file_hash = item.get("hash")
elif hasattr(item, "path") or hasattr(item, "url") or hasattr(item, "source_url") or hasattr(item, "store") or hasattr(item, "hash"):
elif (
hasattr(item, "path")
or hasattr(item, "url")
or hasattr(item, "source_url")
or hasattr(item, "store")
or hasattr(item, "hash")
):
# Handle PipeObject / dataclass objects - prefer path, but fall back to url/source_url attributes
path = getattr(item, "path", None)
if not path:
path = getattr(item, "url", None) or getattr(item, "source_url", None) or getattr(item, "target", None)
path = (
getattr(item, "url", None)
or getattr(item, "source_url", None)
or getattr(item, "target", None)
)
if not path:
known = getattr(item, "url", None) or (getattr(item, "extra", None) or {}).get("url")
known = getattr(item, "url", None) or (getattr(item, "extra", None) or {}).get(
"url"
)
if known and isinstance(known, list):
path = known[0]
title = getattr(item, "title", None) or getattr(item, "file_title", None)
@@ -663,7 +693,11 @@ def _get_playable_path(item: Any, file_storage: Optional[Any], config: Optional[
backend_class = type(backend).__name__
# Folder stores: resolve to an on-disk file path.
if hasattr(backend, "get_file") and callable(getattr(backend, "get_file")) and backend_class == "Folder":
if (
hasattr(backend, "get_file")
and callable(getattr(backend, "get_file"))
and backend_class == "Folder"
):
try:
resolved = backend.get_file(file_hash)
if isinstance(resolved, Path):
@@ -734,6 +768,7 @@ def _queue_items(
file_storage = None
try:
from Store import Store
file_storage = Store(config or {})
except Exception as e:
debug(f"Warning: Could not initialize Store registry: {e}", file=sys.stderr)
@@ -757,7 +792,9 @@ def _queue_items(
# Remove duplicates from playlist starting from the end to keep indices valid
for idx in reversed(dup_indexes):
try:
_send_ipc_command({"command": ["playlist-remove", idx], "request_id": 106}, silent=True)
_send_ipc_command(
{"command": ["playlist-remove", idx], "request_id": 106}, silent=True
)
except Exception:
pass
@@ -812,7 +849,9 @@ def _queue_items(
if base_url:
effective_hydrus_url = str(base_url).rstrip("/")
if key:
effective_hydrus_header = f"Hydrus-Client-API-Access-Key: {str(key).strip()}"
effective_hydrus_header = (
f"Hydrus-Client-API-Access-Key: {str(key).strip()}"
)
effective_ytdl_opts = _build_ytdl_options(config, effective_hydrus_header)
except Exception:
pass
@@ -820,7 +859,9 @@ def _queue_items(
if target:
# If we just have a hydrus hash, build a direct file URL for MPV
if re.fullmatch(r"[0-9a-f]{64}", str(target).strip().lower()) and effective_hydrus_url:
target = f"{effective_hydrus_url.rstrip('/')}/get_files/file?hash={str(target).strip()}"
target = (
f"{effective_hydrus_url.rstrip('/')}/get_files/file?hash={str(target).strip()}"
)
norm_key = _normalize_playlist_path(target) or str(target).strip().lower()
if norm_key in existing_targets or norm_key in new_targets:
@@ -833,13 +874,17 @@ def _queue_items(
# show the raw URL as the playlist title.
if title:
# Sanitize title for M3U (remove newlines)
safe_title = title.replace('\n', ' ').replace('\r', '')
safe_title = title.replace("\n", " ").replace("\r", "")
# Carry the store name for hash URLs so MPV.lyric can resolve the backend.
# This is especially important for local file-server URLs like /get_files/file?hash=...
target_for_m3u = target
try:
if item_store_name and isinstance(target_for_m3u, str) and target_for_m3u.startswith("http"):
if (
item_store_name
and isinstance(target_for_m3u, str)
and target_for_m3u.startswith("http")
):
if "get_files/file" in target_for_m3u and "store=" not in target_for_m3u:
sep = "&" if "?" in target_for_m3u else "?"
target_for_m3u = f"{target_for_m3u}{sep}store={item_store_name}"
@@ -858,10 +903,16 @@ def _queue_items(
# If this is a Hydrus path, set header property and yt-dlp headers before loading.
# Use the real target (not the memory:// wrapper) for detection.
if effective_hydrus_header and _is_hydrus_path(str(target), effective_hydrus_url):
header_cmd = {"command": ["set_property", "http-header-fields", effective_hydrus_header], "request_id": 199}
header_cmd = {
"command": ["set_property", "http-header-fields", effective_hydrus_header],
"request_id": 199,
}
_send_ipc_command(header_cmd, silent=True)
if effective_ytdl_opts:
ytdl_cmd = {"command": ["set_property", "ytdl-raw-options", effective_ytdl_opts], "request_id": 197}
ytdl_cmd = {
"command": ["set_property", "ytdl-raw-options", effective_ytdl_opts],
"request_id": 197,
}
_send_ipc_command(ytdl_cmd, silent=True)
cmd = {"command": ["loadfile", target_to_send, mode], "request_id": 200}
@@ -876,7 +927,9 @@ def _queue_items(
if resp is None:
# MPV not running (or died)
# Start MPV with remaining items
debug(f"MPV not running/died while queuing, starting MPV with remaining items: {items[i:]}")
debug(
f"MPV not running/died while queuing, starting MPV with remaining items: {items[i:]}"
)
_start_mpv(items[i:], config=config, start_opts=start_opts)
return True
elif resp.get("error") == "success":
@@ -884,10 +937,11 @@ def _queue_items(
# would change the MPV window title even if the item isn't currently playing.
debug(f"Queued: {title or target}")
else:
error_msg = str(resp.get('error'))
error_msg = str(resp.get("error"))
debug(f"Failed to queue item: {error_msg}", file=sys.stderr)
return False
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Manage and play items in the MPV playlist via IPC."""
@@ -912,7 +966,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
log_dir = _repo_log_dir()
mpv_log_path = str((log_dir / "medeia-mpv.log").resolve())
except Exception:
mpv_log_path = str((Path(os.environ.get("TEMP") or os.environ.get("TMP") or ".") / "medeia-mpv.log").resolve())
mpv_log_path = str(
(
Path(os.environ.get("TEMP") or os.environ.get("TMP") or ".")
/ "medeia-mpv.log"
).resolve()
)
# Ensure file exists early so we can tail it even if mpv writes later.
try:
Path(mpv_log_path).parent.mkdir(parents=True, exist_ok=True)
@@ -1019,11 +1078,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Emit the current item to pipeline
result_obj = {
'path': filename,
'title': title,
'cmdlet_name': '.pipe',
'source': 'pipe',
'__pipe_index': items.index(current_item),
"path": filename,
"title": title,
"cmdlet_name": ".pipe",
"source": "pipe",
"__pipe_index": items.index(current_item),
}
ctx.emit(result_obj)
@@ -1040,6 +1099,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if mpv_started:
# MPV was just started, wait a moment for it to be ready, then play first item
import time
time.sleep(0.5)
index_arg = "1" # 1-based index for first item
play_mode = True
@@ -1061,6 +1121,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if mpv_started:
# MPV was just started; give it a moment, then play first item.
import time
time.sleep(0.5)
index_arg = "1"
else:
@@ -1156,7 +1217,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Queue items (replacing current playlist)
if items:
_queue_items(items, clear_first=True, config=config, start_opts=start_opts)
_queue_items(
items, clear_first=True, config=config, start_opts=start_opts
)
else:
# Empty playlist, just clear
_send_ipc_command({"command": ["playlist-clear"]}, silent=True)
@@ -1180,22 +1243,22 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
table = ResultTable("Saved Playlists")
for i, pl in enumerate(playlists):
item_count = len(pl.get('items', []))
item_count = len(pl.get("items", []))
row = table.add_row()
# row.add_column("ID", str(pl['id'])) # Hidden as per user request
row.add_column("Name", pl['name'])
row.add_column("Name", pl["name"])
row.add_column("Items", str(item_count))
row.add_column("Updated", pl['updated_at'])
row.add_column("Updated", pl["updated_at"])
# Set the playlist items as the result object for this row
# When user selects @N, they get the list of items
# We also set the source command to .pipe -load <ID> so it loads it
table.set_row_selection_args(i, ["-load", str(pl['id'])])
table.set_row_selection_args(i, ["-load", str(pl["id"])])
table.set_source_command(".pipe")
# Register results
ctx.set_last_result_table_overlay(table, [p['items'] for p in playlists])
ctx.set_last_result_table_overlay(table, [p["items"] for p in playlists])
ctx.set_current_stage_table(table)
# Do not print directly here.
@@ -1243,7 +1306,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
playlist_before = _get_playlist(silent=True)
idle_before = None
try:
idle_resp = _send_ipc_command({"command": ["get_property", "idle-active"], "request_id": 111}, silent=True)
idle_resp = _send_ipc_command(
{"command": ["get_property", "idle-active"], "request_id": 111}, silent=True
)
if idle_resp and idle_resp.get("error") == "success":
idle_before = bool(idle_resp.get("data"))
except Exception:
@@ -1262,7 +1327,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Debug: inspect incoming result and attributes
try:
debug(f"pipe._run: received result type={type(result)} repr={repr(result)[:200]}")
debug(f"pipe._run: attrs path={getattr(result, 'path', None)} url={getattr(result, 'url', None)} store={getattr(result, 'store', None)} hash={getattr(result, 'hash', None)}")
debug(
f"pipe._run: attrs path={getattr(result, 'path', None)} url={getattr(result, 'url', None)} store={getattr(result, 'store', None)} hash={getattr(result, 'hash', None)}"
)
except Exception:
pass
@@ -1294,8 +1361,14 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if should_autoplay and after_len > 0:
idx_to_play = min(max(0, before_len), after_len - 1)
play_resp = _send_ipc_command({"command": ["playlist-play-index", idx_to_play], "request_id": 112}, silent=True)
_send_ipc_command({"command": ["set_property", "pause", False], "request_id": 113}, silent=True)
play_resp = _send_ipc_command(
{"command": ["playlist-play-index", idx_to_play], "request_id": 112},
silent=True,
)
_send_ipc_command(
{"command": ["set_property", "pause", False], "request_id": 113},
silent=True,
)
if play_resp and play_resp.get("error") == "success":
debug("Auto-playing piped item")
@@ -1315,6 +1388,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if mpv_started:
# MPV was just started, retry getting playlist after a brief delay
import time
time.sleep(0.3)
items = _get_playlist(silent=True)
@@ -1324,10 +1398,20 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
return 0
else:
# Do not auto-launch MPV when no action/inputs were provided; avoid surprise startups
no_inputs = not any([
result, url_arg, index_arg, clear_mode, play_mode,
pause_mode, save_mode, load_mode, current_mode, list_mode
])
no_inputs = not any(
[
result,
url_arg,
index_arg,
clear_mode,
play_mode,
pause_mode,
save_mode,
load_mode,
current_mode,
list_mode,
]
)
if no_inputs:
# User invoked `.pipe` with no args: treat this as an intent to open MPV.
@@ -1337,6 +1421,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Re-check playlist after startup; if IPC still isn't ready, just exit cleanly.
try:
import time
time.sleep(0.3)
except Exception:
pass
@@ -1386,18 +1471,26 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
list_mode = True
index_arg = None
else:
debug(f"Failed to remove item: {resp.get('error') if resp else 'No response'}")
debug(
f"Failed to remove item: {resp.get('error') if resp else 'No response'}"
)
return 1
else:
# Play item
if hydrus_header and _is_hydrus_path(filename, hydrus_url):
header_cmd = {"command": ["set_property", "http-header-fields", hydrus_header], "request_id": 198}
header_cmd = {
"command": ["set_property", "http-header-fields", hydrus_header],
"request_id": 198,
}
_send_ipc_command(header_cmd, silent=True)
cmd = {"command": ["playlist-play-index", idx], "request_id": 102}
resp = _send_ipc_command(cmd)
if resp and resp.get("error") == "success":
# Ensure playback starts (unpause)
unpause_cmd = {"command": ["set_property", "pause", False], "request_id": 103}
unpause_cmd = {
"command": ["set_property", "pause", False],
"request_id": 103,
}
_send_ipc_command(unpause_cmd)
debug(f"Playing: {title}")
@@ -1410,7 +1503,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
list_mode = True
index_arg = None
else:
debug(f"Failed to play item: {resp.get('error') if resp else 'No response'}")
debug(
f"Failed to play item: {resp.get('error') if resp else 'No response'}"
)
return 1
except ValueError:
debug(f"Invalid index: {index_arg}")
@@ -1425,6 +1520,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if file_storage is None:
try:
from Store import Store
file_storage = Store(config)
except Exception as e:
debug(f"Warning: Could not initialize Store registry: {e}", file=sys.stderr)
@@ -1468,7 +1564,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Try to extract hash from filename (e.g., C:\path\1e8c46...a1b2.mp4)
path_obj = Path(real_path)
stem = path_obj.stem # filename without extension
if len(stem) == 64 and all(c in '0123456789abcdef' for c in stem.lower()):
if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()):
file_hash = stem.lower()
# Find which folder store has this file
if file_storage:
@@ -1493,7 +1589,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
hash=file_hash or "unknown",
store=store_name or "unknown",
title=title,
path=real_path
path=real_path,
)
pipe_objects.append(pipe_obj)
@@ -1540,6 +1636,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
break
try:
import time
time.sleep(0.25)
except Exception:
break
@@ -1550,8 +1647,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
print(ln)
else:
print("MPV log (tail): <empty>")
print("Note: On some Windows builds, mpv cannot start writing to --log-file after launch.")
print("If you need full [main2] logs, restart mpv so it starts with --log-file.")
print(
"Note: On some Windows builds, mpv cannot start writing to --log-file after launch."
)
print(
"If you need full [main2] logs, restart mpv so it starts with --log-file."
)
# Also print the helper log tail (this captures Python helper output that won't
# necessarily show up in MPV's own log-file).
@@ -1597,7 +1698,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
except Exception:
pass
def _start_mpv(items: List[Any], config: Optional[Dict[str, Any]] = None, start_opts: Optional[Dict[str, Any]] = None) -> None:
def _start_mpv(
items: List[Any],
config: Optional[Dict[str, Any]] = None,
start_opts: Optional[Dict[str, Any]] = None,
) -> None:
"""Start MPV with a list of items."""
import time as _time_module
@@ -1624,7 +1730,7 @@ def _start_mpv(items: List[Any], config: Optional[Dict[str, Any]] = None, start_
try:
extra_args: List[str] = [
'--ytdl-format=bestvideo[height<=?1080]+bestaudio/best[height<=?1080]',
"--ytdl-format=bestvideo[height<=?1080]+bestaudio/best[height<=?1080]",
]
# Optional: borderless window (useful for uosc-like overlay UI without fullscreen).
@@ -1662,6 +1768,7 @@ def _start_mpv(items: List[Any], config: Optional[Dict[str, Any]] = None, start_
# Auto-play the first item
import time
time.sleep(0.3) # Give MPV a moment to process the queued items
# Play the first item (index 0) and unpause
@@ -1690,34 +1797,17 @@ CMDLET = Cmdlet(
name="index",
type="string", # Changed to string to allow URL detection
description="Index of item to play/clear, or URL to queue",
required=False
),
CmdletArg(
name="url",
type="string",
description="URL to queue",
required=False
required=False,
),
CmdletArg(name="url", type="string", description="URL to queue", required=False),
CmdletArg(
name="clear",
type="flag",
description="Remove the selected item, or clear entire playlist if no index provided"
),
CmdletArg(
name="list",
type="flag",
description="List items (default)"
),
CmdletArg(
name="play",
type="flag",
description="Resume playback"
),
CmdletArg(
name="pause",
type="flag",
description="Pause playback"
description="Remove the selected item, or clear entire playlist if no index provided",
),
CmdletArg(name="list", type="flag", description="List items (default)"),
CmdletArg(name="play", type="flag", description="Resume playback"),
CmdletArg(name="pause", type="flag", description="Pause playback"),
CmdletArg(
name="save",
type="flag",
@@ -1733,19 +1823,18 @@ CMDLET = Cmdlet(
CmdletArg(
name="current",
type="flag",
description="Emit the currently playing item to pipeline for further processing"
description="Emit the currently playing item to pipeline for further processing",
),
CmdletArg(
name="log",
type="flag",
description="Enable pipeable debug output and write an mpv log file"
description="Enable pipeable debug output and write an mpv log file",
),
CmdletArg(
name="borderless",
type="flag",
description="Start mpv with no window border (uosc-like overlay feel without fullscreen)"
description="Start mpv with no window border (uosc-like overlay feel without fullscreen)",
),
],
exec=_run
exec=_run,
)
+29 -7
View File
@@ -193,7 +193,9 @@ def _run(_result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
except Exception:
pass
try:
chat_usernames = list(dict.fromkeys([str(u).strip() for u in chat_usernames if str(u).strip()]))
chat_usernames = list(
dict.fromkeys([str(u).strip() for u in chat_usernames if str(u).strip()])
)
except Exception:
pass
@@ -213,7 +215,10 @@ def _run(_result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
p = _extract_file_path(item)
if not p:
any_failed = True
log("Telegram send requires local file path(s) on the piped item(s)", file=sys.stderr)
log(
"Telegram send requires local file path(s) on the piped item(s)",
file=sys.stderr,
)
continue
title = _extract_title(item)
file_jobs.append({"path": p, "title": title})
@@ -236,7 +241,9 @@ def _run(_result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
return 1
try:
provider.send_files_to_chats(chat_ids=chat_ids, usernames=chat_usernames, files=file_jobs)
provider.send_files_to_chats(
chat_ids=chat_ids, usernames=chat_usernames, files=file_jobs
)
except Exception as exc:
log(f"Telegram send failed: {exc}", file=sys.stderr)
any_failed = True
@@ -268,12 +275,17 @@ def _run(_result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Only show dialogs you can typically post to.
try:
rows = [r for r in (rows or []) if str(r.get("type") or "").strip().lower() in {"group", "user"}]
rows = [
r for r in (rows or []) if str(r.get("type") or "").strip().lower() in {"group", "user"}
]
except Exception:
pass
if not rows:
log("No Telegram groups/users available (or not logged in). Run: .telegram -login", file=sys.stderr)
log(
"No Telegram groups/users available (or not logged in). Run: .telegram -login",
file=sys.stderr,
)
return 0
table = ResultTable("Telegram Chats")
@@ -313,8 +325,18 @@ CMDLET = Cmdlet(
summary="Telegram login and chat listing",
usage="@N | .telegram (pick a chat, then send piped files)",
arg=[
CmdletArg(name="login", type="bool", description="Create/refresh a Telegram session (prompts)", required=False),
CmdletArg(name="send", type="bool", description="(internal) Send to selected chat(s)", required=False),
CmdletArg(
name="login",
type="bool",
description="Create/refresh a Telegram session (prompts)",
required=False,
),
CmdletArg(
name="send",
type="bool",
description="(internal) Send to selected chat(s)",
required=False,
),
],
exec=_run,
)
+25 -5
View File
@@ -1,4 +1,5 @@
"""Worker cmdlet: Display workers table in ResultTable format."""
from __future__ import annotations
import sys
@@ -21,11 +22,26 @@ CMDLET = Cmdlet(
summary="Display workers table in result table format.",
usage=".worker [status] [-limit N] [@N]",
arg=[
CmdletArg("status", description="Filter by status: running, completed, error (default: all)", requires_db=True),
CmdletArg("limit", type="integer", description="Limit results (default: 100)", requires_db=True),
CmdletArg("@N", description="Select worker by index (1-based) and display full logs", requires_db=True),
CmdletArg(
"status",
description="Filter by status: running, completed, error (default: all)",
requires_db=True,
),
CmdletArg(
"limit", type="integer", description="Limit results (default: 100)", requires_db=True
),
CmdletArg(
"@N",
description="Select worker by index (1-based) and display full logs",
requires_db=True,
),
CmdletArg("-id", description="Show full logs for a specific worker", requires_db=True),
CmdletArg("-clear", type="flag", description="Remove completed workers from the database", requires_db=True),
CmdletArg(
"-clear",
type="flag",
description="Remove completed workers from the database",
requires_db=True,
),
],
detail=[
"- Shows all background worker tasks and their output",
@@ -188,7 +204,11 @@ def _render_worker_selection(db, selected_items: Any) -> int:
continue
events: List[Dict[str, Any]] = []
try:
events = db.get_worker_events(worker.get("worker_id")) if hasattr(db, "get_worker_events") else []
events = (
db.get_worker_events(worker.get("worker_id"))
if hasattr(db, "get_worker_events")
else []
)
except Exception:
events = []
_emit_worker_detail(worker, events)
+25 -11
View File
@@ -1,6 +1,5 @@
""" """
"""
"""
from __future__ import annotations
import re
@@ -76,7 +75,9 @@ def _merge_dict_inplace(base: Dict[str, Any], patch: Dict[str, Any]) -> Dict[str
return base
def _apply_conf_block(config: Dict[str, Any], kind: str, subtype: str, block: Dict[str, Any]) -> None:
def _apply_conf_block(
config: Dict[str, Any], kind: str, subtype: str, block: Dict[str, Any]
) -> None:
kind_l = str(kind).strip().lower()
subtype_l = str(subtype).strip().lower()
@@ -290,12 +291,13 @@ def _serialize_conf(config: Dict[str, Any]) -> str:
def _make_cache_key(config_dir: Optional[Path], filename: str, actual_path: Optional[Path]) -> str:
if actual_path:
return str(actual_path.resolve())
base_dir = (config_dir or SCRIPT_DIR)
base_dir = config_dir or SCRIPT_DIR
return str((base_dir / filename).resolve())
def get_hydrus_instance(config: Dict[str, Any], instance_name: str = "home") -> Optional[Dict[str, Any]]:
def get_hydrus_instance(
config: Dict[str, Any], instance_name: str = "home"
) -> Optional[Dict[str, Any]]:
"""Get a specific Hydrus instance config by name.
Supports multiple formats:
@@ -380,7 +382,6 @@ def get_soulseek_password(config: Dict[str, Any]) -> Optional[str]:
return str(val).strip() if val else None
def resolve_output_dir(config: Dict[str, Any]) -> Path:
"""Resolve output directory from config with single source of truth.
@@ -530,7 +531,9 @@ def get_provider_credentials(config: Dict[str, Any], provider: str) -> Optional[
return None
def resolve_cookies_path(config: Dict[str, Any], script_dir: Optional[Path] = None) -> Optional[Path]:
def resolve_cookies_path(
config: Dict[str, Any], script_dir: Optional[Path] = None
) -> Optional[Path]:
# Support both legacy top-level `cookies=...` and the modular conf style:
# [tool=ytdlp]
# cookies="C:\\path\\cookies.txt"
@@ -573,6 +576,7 @@ def resolve_cookies_path(config: Dict[str, Any], script_dir: Optional[Path] = No
return default_path
return None
def resolve_debug_log(config: Dict[str, Any]) -> Optional[Path]:
value = config.get("download_debug_log")
if not value:
@@ -582,7 +586,10 @@ def resolve_debug_log(config: Dict[str, Any]) -> Optional[Path]:
path = Path.cwd() / path
return path
def load_config(config_dir: Optional[Path] = None, filename: str = DEFAULT_CONFIG_FILENAME) -> Dict[str, Any]:
def load_config(
config_dir: Optional[Path] = None, filename: str = DEFAULT_CONFIG_FILENAME
) -> Dict[str, Any]:
base_dir = config_dir or SCRIPT_DIR
config_path = base_dir / filename
cache_key = _make_cache_key(config_dir, filename, config_path)
@@ -608,7 +615,9 @@ def load_config(config_dir: Optional[Path] = None, filename: str = DEFAULT_CONFI
return data
def reload_config(config_dir: Optional[Path] = None, filename: str = DEFAULT_CONFIG_FILENAME) -> Dict[str, Any]:
def reload_config(
config_dir: Optional[Path] = None, filename: str = DEFAULT_CONFIG_FILENAME
) -> Dict[str, Any]:
cache_key = _make_cache_key(config_dir, filename, None)
_CONFIG_CACHE.pop(cache_key, None)
return load_config(config_dir=config_dir, filename=filename)
@@ -617,6 +626,7 @@ def reload_config(config_dir: Optional[Path] = None, filename: str = DEFAULT_CON
def clear_config_cache() -> None:
_CONFIG_CACHE.clear()
def save_config(
config: Dict[str, Any],
config_dir: Optional[Path] = None,
@@ -626,7 +636,9 @@ def save_config(
config_path = base_dir / filename
if config_path.suffix.lower() != ".conf":
raise RuntimeError(f"Unsupported config format: {config_path.name} (only .conf is supported)")
raise RuntimeError(
f"Unsupported config format: {config_path.name} (only .conf is supported)"
)
try:
config_path.write_text(_serialize_conf(config), encoding="utf-8")
@@ -636,10 +648,12 @@ def save_config(
cache_key = _make_cache_key(config_dir, filename, config_path)
_CONFIG_CACHE[cache_key] = config
def load() -> Dict[str, Any]:
"""Return the parsed downlow configuration."""
return load_config()
def save(config: Dict[str, Any]) -> None:
"""Persist *config* back to disk."""
save_config(config)
+234
View File
@@ -0,0 +1,234 @@
# get-url Architecture & Flow
## Overview
The enhanced `get-url` command supports two modes:
```
get-url
├── SEARCH MODE (new)
│ └── -url "pattern"
│ ├── Normalize pattern (strip protocol, www)
│ ├── Search all stores
│ ├── Match URLs with wildcards
│ └── Return grouped results
└── ORIGINAL MODE (unchanged)
├── Hash lookup
├── Store lookup
└── Return URLs for file
```
## Flow Diagram: URL Search
```
User Input
v
get-url -url "youtube.com*"
v
_normalize_url_for_search()
│ Strips: https://, http://, www.
│ Result: "youtube.com*" (unchanged, already normalized)
v
_search_urls_across_stores()
├─→ Store 1 (Hydrus)
│ ├─→ search("*", limit=1000)
│ ├─→ get_url(file_hash) for each file
│ └─→ _match_url_pattern() for each URL
├─→ Store 2 (Folder)
│ ├─→ search("*", limit=1000)
│ ├─→ get_url(file_hash) for each file
│ └─→ _match_url_pattern() for each URL
└─→ ...more stores...
Matching URLs:
├─→ https://www.youtube.com/watch?v=123
├─→ http://youtube.com/shorts/abc
└─→ https://youtube.com/playlist?list=xyz
Normalized for matching:
├─→ youtube.com/watch?v=123 ✓ Matches "youtube.com*"
├─→ youtube.com/shorts/abc ✓ Matches "youtube.com*"
└─→ youtube.com/playlist?... ✓ Matches "youtube.com*"
v
Collect UrlItem results
├─→ UrlItem(url="https://www.youtube.com/watch?v=123",
│ hash="abcd1234...", store="hydrus")
├─→ UrlItem(url="http://youtube.com/shorts/abc",
│ hash="efgh5678...", store="folder")
└─→ ...more items...
v
Group by store
├─→ Hydrus
│ ├─→ https://www.youtube.com/watch?v=123
│ └─→ ...
└─→ Folder
├─→ http://youtube.com/shorts/abc
└─→ ...
v
Emit UrlItem objects for piping
v
Return exit code 0 (success)
```
## Code Structure
```
Get_Url (class)
├── __init__()
│ └── Register command with CLI
├── _normalize_url_for_search() [static]
│ └── Strip protocol & www, lowercase
├── _match_url_pattern() [static]
│ └── fnmatch with normalization
├── _search_urls_across_stores() [instance]
│ ├── Iterate stores
│ ├── Search files in store
│ ├── Get URLs for each file
│ ├── Apply pattern matching
│ └── Return (items, stores_found)
└── run() [main execution]
├── Check for -url flag
│ ├── YES: Search mode
│ │ └── _search_urls_across_stores()
│ └── NO: Original mode
│ └── Hash+store lookup
└── Return exit code
```
## Data Flow Examples
### Example 1: Search by Domain
```
Input: get-url -url "www.google.com"
Normalize: "google.com" (www. stripped)
Search Results:
Store "hydrus":
- https://www.google.com ✓
- https://google.com/search?q=hello ✓
- https://google.com/maps ✓
Store "folder":
- http://google.com ✓
- https://google.com/images ✓
Output: 5 matching URLs grouped by store
```
### Example 2: Wildcard Pattern
```
Input: get-url -url "youtube.com/watch*"
Pattern: "youtube.com/watch*"
Search Results:
Store "hydrus":
- https://www.youtube.com/watch?v=123 ✓
- https://youtube.com/watch?list=abc ✓
- https://www.youtube.com/shorts/xyz ✗ (doesn't match /watch*)
Store "folder":
- http://youtube.com/watch?v=456 ✓
Output: 3 matching URLs (watch only, not shorts)
```
### Example 3: Subdomain Wildcard
```
Input: get-url -url "*.example.com*"
Normalize: "*.example.com*" (already normalized)
Search Results:
Store "hydrus":
- https://cdn.example.com/video.mp4 ✓
- https://api.example.com/endpoint ✓
- https://www.example.com ✓
- https://other.org ✗
Output: 3 matching URLs
```
## Integration with Piping
```
# Search → Filter → Add Tag
get-url -url "youtube.com*" | add-tag -tag "video-source"
# Search → Count
get-url -url "reddit.com*" | wc -l
# Search → Export
get-url -url "github.com*" > github_urls.txt
```
## Error Handling Flow
```
get-url -url "pattern"
├─→ No stores configured?
│ └─→ Log "Error: No stores configured"
│ └─→ Return exit code 1
├─→ Store search fails?
│ └─→ Log error, skip store, continue
├─→ No matches found?
│ └─→ Log "No urls matching pattern"
│ └─→ Return exit code 1
└─→ Matches found?
└─→ Return exit code 0
```
## Performance Considerations
1. **Store Iteration**: Loops through all configured stores
2. **File Scanning**: Each store searches up to 1000 files
3. **URL Matching**: Each URL tested against pattern (fnmatch - O(n) per URL)
4. **Memory**: Stores all matching items in memory before display
Optimization opportunities:
- Cache store results
- Limit search scope with --store flag
- Early exit with --limit N
- Pagination support
## Backward Compatibility
Original mode (unchanged):
```
@1 | get-url
└─→ No -url flag
└─→ Use original logic
├─→ Get hash from result
├─→ Get store from result or args
├─→ Call backend.get_url(hash)
└─→ Return URLs for that file
```
All original functionality preserved. New -url flag is additive only.
+76
View File
@@ -0,0 +1,76 @@
# Quick Reference: get-url URL Search
## Basic Syntax
```bash
# Search mode (new)
get-url -url "pattern"
# Original mode (unchanged)
@1 | get-url
```
## Examples
### Exact domain match
```bash
get-url -url "google.com"
```
Matches: `https://www.google.com`, `http://google.com/search`, `https://google.com/maps`
### YouTube URL search
```bash
get-url -url "https://www.youtube.com/watch?v=xx_88TDWmEs"
```
Normalizes to: `youtube.com/watch?v=xx_88tdwmes`
Matches: Any video with same ID across different protocols
### Wildcard domain
```bash
get-url -url "youtube.com*"
```
Matches: All YouTube URLs (videos, shorts, playlists, etc.)
### Subdomain wildcard
```bash
get-url -url "*.example.com*"
```
Matches: `cdn.example.com`, `api.example.com`, `www.example.com`
### Specific path pattern
```bash
get-url -url "youtube.com/watch*"
```
Matches: Only YouTube watch URLs (not shorts or playlists)
### Single character wildcard
```bash
get-url -url "example.com/file?.mp4"
```
Matches: `example.com/file1.mp4`, `example.com/fileA.mp4` (not `file12.mp4`)
## How It Works
1. **Normalization**: Strips `https://`, `www.` prefix from pattern and all URLs
2. **Pattern Matching**: Uses `*` and `?` wildcards (case-insensitive)
3. **Search**: Scans all configured stores for matching URLs
4. **Results**: Groups matches by store, shows URL and hash
## Return Values
- Exit code **0** if matches found
- Exit code **1** if no matches or error
## Piping Results
```bash
get-url -url "youtube.com*" | grep -i video
get-url -url "example.com*" | add-tag -tag "external-source"
```
## Common Patterns
| Pattern | Matches | Notes |
|---------|---------|-------|
| `google.com` | Google URLs | Exact domain (after normalization) |
| `youtube.com*` | All YouTube | Wildcard at end |
| `*.example.com*` | Subdomains | Wildcard at start and end |
| `github.com/user*` | User repos | Path pattern |
| `reddit.com/r/*` | Subreddit | Path with wildcard |
+91
View File
@@ -0,0 +1,91 @@
# get-url Enhanced URL Search
The `get-url` command now supports searching for URLs across all stores with automatic protocol and `www` prefix stripping.
## Features
### 1. **Protocol Stripping**
URLs are normalized by removing:
- Protocol prefixes: `https://`, `http://`, `ftp://`, etc.
- `www.` prefix (case-insensitive)
### 2. **Wildcard Matching**
Patterns support standard wildcards:
- `*` - matches any sequence of characters
- `?` - matches any single character
### 3. **Case-Insensitive Matching**
All matching is case-insensitive for domains and paths
## Usage Examples
### Search by full domain
```bash
get-url -url "www.google.com"
# Matches:
# - https://www.google.com
# - http://google.com/search
# - https://google.com/maps
```
### Search with YouTube example
```bash
get-url -url "https://www.youtube.com/watch?v=xx_88TDWmEs"
# Becomes: youtube.com/watch?v=xx_88tdwmes
# Matches:
# - https://www.youtube.com/watch?v=xx_88TDWmEs
# - http://youtube.com/watch?v=xx_88TDWmEs
```
### Domain wildcard matching
```bash
get-url -url "youtube.com*"
# Matches any URL starting with youtube.com:
# - https://www.youtube.com/watch?v=123
# - https://youtube.com/shorts/abc
# - http://youtube.com/playlist?list=xyz
```
### Subdomain matching
```bash
get-url -url "*example.com*"
# Matches:
# - https://cdn.example.com/file.mp4
# - https://www.example.com
# - https://api.example.com/endpoint
```
### Specific path matching
```bash
get-url -url "youtube.com/watch*"
# Matches:
# - https://www.youtube.com/watch?v=123
# - http://youtube.com/watch?list=abc
# Does NOT match:
# - https://youtube.com/shorts/abc
```
## Get URLs for Specific File
The original functionality is still supported:
```bash
@1 | get-url
# Requires hash and store from piped result
```
## Output
Results are organized by store and show:
- **Store**: Backend name (hydrus, folder, etc.)
- **Url**: The full matched URL
- **Hash**: First 16 characters of the file hash (for compactness)
## Implementation Details
The search:
1. Iterates through all configured stores
2. Searches for all files in each store (limit 1000 per store)
3. Retrieves URLs for each file
4. Applies pattern matching with normalization
5. Returns results grouped by store
6. Emits `UrlItem` objects for piping to other commands
+1
View File
@@ -5,6 +5,7 @@ packaged distribution the preferred entry is `medeia_macina.cli_entry.main`.
When running from the repository (or in legacy installs) the module will
attempt to import `MedeiaCLI` from the top-level `CLI` module.
"""
import sys
from pathlib import Path
+3
View File
@@ -3,6 +3,7 @@
This wraps the existing `medeia_entry.py` runner so installers can set
entry points to `medeia_macina.cli_entry:main`.
"""
from __future__ import annotations
from typing import Optional, List, Tuple
@@ -196,6 +197,7 @@ def _run_cli(clean_args: List[str]) -> int:
try:
_ensure_repo_root_on_sys_path()
from CLI import MedeiaCLI as _M # type: ignore
MedeiaCLI = _M
except Exception:
raise ImportError(
@@ -261,6 +263,7 @@ def main(argv: Optional[List[str]] = None) -> int:
# exit early to avoid confusing import-time errors later during startup.
try:
from SYS.env_check import ensure_urllib3_ok
try:
ensure_urllib3_ok(exit_on_error=True)
except SystemExit as exc:
+789 -587
View File
File diff suppressed because it is too large Load Diff
+63 -15
View File
@@ -53,6 +53,7 @@ class PipeObject:
parent_hash: Hash of the parent file in the pipeline chain (for tracking provenance/lineage)
extra: Additional fields not covered above
"""
hash: str
store: str
provider: Optional[str] = None
@@ -114,7 +115,10 @@ class PipeObject:
cmdlet_name = "PipeObject"
try:
import pipeline as ctx
current = ctx.get_current_cmdlet_name("") if hasattr(ctx, "get_current_cmdlet_name") else ""
current = (
ctx.get_current_cmdlet_name("") if hasattr(ctx, "get_current_cmdlet_name") else ""
)
if current:
cmdlet_name = current
else:
@@ -231,6 +235,7 @@ class FileRelationshipTracker:
self.add_alt(primary_path, alt_hash)
except Exception as e:
import sys
print(f"Error hashing {alt_path}: {e}", file=sys.stderr)
@@ -245,6 +250,7 @@ def _get_file_hash(filepath: str) -> str:
# ============= Download Module Classes =============
class DownloadError(RuntimeError):
"""Raised when the download or Hydrus import fails."""
@@ -255,6 +261,7 @@ class DownloadOptions:
Use the add-file cmdlet separately for Hydrus import.
"""
url: str
mode: str # "audio" or "video"
output_dir: Path
@@ -273,13 +280,14 @@ class DownloadOptions:
class SendFunc(Protocol):
"""Protocol for event sender function."""
def __call__(self, event: str, **payload: Any) -> None:
...
def __call__(self, event: str, **payload: Any) -> None: ...
@dataclass(slots=True)
class DownloadMediaResult:
"""Result of a successful media download."""
path: Path
info: Dict[str, Any]
tag: List[str]
@@ -291,6 +299,7 @@ class DownloadMediaResult:
@dataclass(slots=True)
class DebugLogger:
"""Logs events to a JSON debug file for troubleshooting downloads."""
path: Path
file: Optional[TextIO] = None
session_started: bool = False
@@ -383,8 +392,7 @@ def _sanitise_for_json(value: Any, *, max_depth: int = 8, _seen: Optional[set[in
if isinstance(value, (list, tuple, set)):
iterable = value if not isinstance(value, set) else list(value)
return [
_sanitise_for_json(item, max_depth=max_depth - 1, _seen=_seen)
for item in iterable
_sanitise_for_json(item, max_depth=max_depth - 1, _seen=_seen) for item in iterable
]
if is_dataclass(value) and not isinstance(value, type):
return _sanitise_for_json(asdict(value), max_depth=max_depth - 1, _seen=_seen)
@@ -393,6 +401,7 @@ def _sanitise_for_json(value: Any, *, max_depth: int = 8, _seen: Optional[set[in
return repr(value)
class ProgressBar:
"""Rich progress helper for byte-based transfers.
@@ -419,7 +428,9 @@ class ProgressBar:
# Pipeline-backed transfer task is already registered; update its total if needed.
try:
if total is not None and total > 0:
self._pipeline_ui.update_transfer(label=self._pipeline_label, completed=None, total=int(total))
self._pipeline_ui.update_transfer(
label=self._pipeline_label, completed=None, total=int(total)
)
except Exception:
pass
return
@@ -438,7 +449,10 @@ class ProgressBar:
self._pipeline_ui = ui
self._pipeline_label = str(label or "download")
try:
ui.begin_transfer(label=self._pipeline_label, total=int(total) if isinstance(total, int) and total > 0 else None)
ui.begin_transfer(
label=self._pipeline_label,
total=int(total) if isinstance(total, int) and total > 0 else None,
)
except Exception:
# If pipeline integration fails, fall back to standalone progress.
self._pipeline_ui = None
@@ -503,7 +517,9 @@ class ProgressBar:
if self._progress is None or self._task_id is None:
return
if total is not None and total > 0:
self._progress.update(self._task_id, completed=int(downloaded or 0), total=int(total), refresh=True)
self._progress.update(
self._task_id, completed=int(downloaded or 0), total=int(total), refresh=True
)
else:
self._progress.update(self._task_id, completed=int(downloaded or 0), refresh=True)
@@ -555,7 +571,14 @@ class ProgressFileReader:
Progress is written to stderr (so pipelines remain clean).
"""
def __init__(self, fileobj: Any, *, total_bytes: Optional[int], label: str = "upload", min_interval_s: float = 0.25):
def __init__(
self,
fileobj: Any,
*,
total_bytes: Optional[int],
label: str = "upload",
min_interval_s: float = 0.25,
):
self._f = fileobj
self._total = int(total_bytes) if total_bytes not in (None, 0, "") else 0
self._label = str(label or "upload")
@@ -574,7 +597,12 @@ class ProgressFileReader:
now = time.time()
if now - self._last < self._min_interval_s:
return
self._bar.update(downloaded=int(self._read), total=int(self._total), label=str(self._label or "upload"), file=sys.stderr)
self._bar.update(
downloaded=int(self._read),
total=int(self._total),
label=str(self._label or "upload"),
file=sys.stderr,
)
self._last = now
def _finish(self) -> None:
@@ -868,7 +896,13 @@ class PipelineLiveProgress:
return
if self._live is not None:
return
if self._console is None or self._pipe_progress is None or self._subtasks is None or self._transfers is None or self._overall is None:
if (
self._console is None
or self._pipe_progress is None
or self._subtasks is None
or self._transfers is None
or self._overall is None
):
# Not initialized yet; start fresh.
self.start()
return
@@ -1081,7 +1115,9 @@ class PipelineLiveProgress:
except Exception:
pass
def update_transfer(self, *, label: str, completed: Optional[int], total: Optional[int] = None) -> None:
def update_transfer(
self, *, label: str, completed: Optional[int], total: Optional[int] = None
) -> None:
if not self._enabled:
return
if self._transfers is None:
@@ -1123,7 +1159,9 @@ class PipelineLiveProgress:
return False
return True
def begin_pipe(self, pipe_index: int, *, total_items: int, items_preview: Optional[List[Any]] = None) -> None:
def begin_pipe(
self, pipe_index: int, *, total_items: int, items_preview: Optional[List[Any]] = None
) -> None:
if not self._ensure_pipe(pipe_index):
return
pipe_progress = self._pipe_progress
@@ -1321,7 +1359,11 @@ class PipelineLiveProgress:
if self._overall_task is not None:
completed = 0
try:
completed = sum(1 for i in range(len(self._pipe_labels)) if self._pipe_done[i] >= max(1, self._pipe_totals[i]))
completed = sum(
1
for i in range(len(self._pipe_labels))
if self._pipe_done[i] >= max(1, self._pipe_totals[i])
)
except Exception:
completed = 0
overall.update(
@@ -1330,6 +1372,7 @@ class PipelineLiveProgress:
description=f"Pipeline: {completed}/{len(self._pipe_labels)} pipes completed",
)
class PipelineStageContext:
"""Context information for the current pipeline stage."""
@@ -1343,7 +1386,7 @@ class PipelineStageContext:
):
self.stage_index = stage_index
self.total_stages = total_stages
self.is_last_stage = (stage_index == total_stages - 1)
self.is_last_stage = stage_index == total_stages - 1
self.pipe_index = int(pipe_index) if pipe_index is not None else None
self.worker_id = worker_id
self._on_emit = on_emit
@@ -1377,6 +1420,7 @@ class PipelineStageContext:
# Consolidated from result_table.py
# ============================================================================
@dataclass
class InputOption:
"""Represents an interactive input option (cmdlet argument) in a table.
@@ -1397,6 +1441,7 @@ class InputOption:
table.add_input_option(location_opt)
selected = table.select_option("location") # Returns user choice
"""
name: str
"""Option name (maps to cmdlet argument)"""
type: str = "string"
@@ -1428,6 +1473,7 @@ class TUIResultCard:
Used in hub-ui and TUI contexts to render individual search results
as grouped components with visual structure.
"""
title: str
subtitle: Optional[str] = None
metadata: Optional[Dict[str, str]] = None
@@ -1448,6 +1494,7 @@ class TUIResultCard:
@dataclass
class ResultColumn:
"""Represents a single column in a result table."""
name: str
value: str
width: Optional[int] = None
@@ -1464,6 +1511,7 @@ class ResultColumn:
@dataclass
class ResultRow:
"""Represents a single row in a result table."""
columns: List[ResultColumn] = field(default_factory=list)
def add_column(self, name: str, value: Any) -> None:
+56 -29
View File
@@ -165,6 +165,7 @@ def clear_pipeline_stop() -> None:
# PUBLIC API
# ============================================================================
def set_stage_context(context: Optional[PipelineStageContext]) -> None:
"""Set the current pipeline stage context."""
global _CURRENT_CONTEXT
@@ -234,7 +235,9 @@ def print_if_visible(*args: Any, file=None, **kwargs: Any) -> None:
"""
try:
# Print if: not in a pipeline OR this is the last stage
should_print = (_CURRENT_CONTEXT is None) or (_CURRENT_CONTEXT and _CURRENT_CONTEXT.is_last_stage)
should_print = (_CURRENT_CONTEXT is None) or (
_CURRENT_CONTEXT and _CURRENT_CONTEXT.is_last_stage
)
# Always print to stderr regardless
if file is not None:
@@ -284,7 +287,7 @@ def load_value(key: str, default: Any = None) -> Any:
text = key.strip()
if not text:
return default
parts = [segment.strip() for segment in text.split('.') if segment.strip()]
parts = [segment.strip() for segment in text.split(".") if segment.strip()]
if not parts:
return default
root_key = parts[0].lower()
@@ -329,7 +332,9 @@ def load_value(key: str, default: Any = None) -> Any:
return current
def set_pending_pipeline_tail(stages: Optional[Sequence[Sequence[str]]], source_command: Optional[str] = None) -> None:
def set_pending_pipeline_tail(
stages: Optional[Sequence[Sequence[str]]], source_command: Optional[str] = None
) -> None:
"""Store the remaining pipeline stages when execution pauses for @N selection.
Args:
@@ -367,8 +372,6 @@ def clear_pending_pipeline_tail() -> None:
_PENDING_PIPELINE_SOURCE = None
def reset() -> None:
"""Reset all pipeline state. Called between pipeline executions."""
global _PIPELINE_VALUES, _LAST_SEARCH_QUERY, _PIPELINE_REFRESHED
@@ -465,7 +468,7 @@ def split_pipeline_text(pipeline_text: str) -> List[str]:
escape = True
continue
if ch in ("\"", "'"):
if ch in ('"', "'"):
if quote is None:
quote = ch
elif quote == ch:
@@ -583,7 +586,7 @@ def get_ui_library_refresh_callback() -> Optional[Any]:
return _UI_LIBRARY_REFRESH_CALLBACK
def trigger_ui_library_refresh(library_filter: str = 'local') -> None:
def trigger_ui_library_refresh(library_filter: str = "local") -> None:
"""Trigger a library refresh in the UI if callback is registered.
This should be called from cmdlet/funacts after content is added to library.
@@ -596,10 +599,14 @@ def trigger_ui_library_refresh(library_filter: str = 'local') -> None:
try:
callback(library_filter)
except Exception as e:
print(f"[trigger_ui_library_refresh] Error calling refresh callback: {e}", file=sys.stderr)
print(
f"[trigger_ui_library_refresh] Error calling refresh callback: {e}", file=sys.stderr
)
def set_last_result_table(result_table: Optional[Any], items: Optional[List[Any]] = None, subject: Optional[Any] = None) -> None:
def set_last_result_table(
result_table: Optional[Any], items: Optional[List[Any]] = None, subject: Optional[Any] = None
) -> None:
"""Store the last result table and items for @ selection syntax.
This should be called after displaying a result table, so users can reference
@@ -618,7 +625,9 @@ def set_last_result_table(result_table: Optional[Any], items: Optional[List[Any]
# Push current table to history before replacing
if _LAST_RESULT_TABLE is not None:
_RESULT_TABLE_HISTORY.append((_LAST_RESULT_TABLE, _LAST_RESULT_ITEMS.copy(), _LAST_RESULT_SUBJECT))
_RESULT_TABLE_HISTORY.append(
(_LAST_RESULT_TABLE, _LAST_RESULT_ITEMS.copy(), _LAST_RESULT_SUBJECT)
)
# Keep history size limited
if len(_RESULT_TABLE_HISTORY) > _MAX_RESULT_TABLE_HISTORY:
_RESULT_TABLE_HISTORY.pop(0)
@@ -632,14 +641,18 @@ def set_last_result_table(result_table: Optional[Any], items: Optional[List[Any]
_LAST_RESULT_SUBJECT = subject
# Sort table by Title/Name column alphabetically if available
if result_table is not None and hasattr(result_table, 'sort_by_title') and not getattr(result_table, 'preserve_order', False):
if (
result_table is not None
and hasattr(result_table, "sort_by_title")
and not getattr(result_table, "preserve_order", False)
):
try:
result_table.sort_by_title()
# Re-order items list to match the sorted table
if _LAST_RESULT_ITEMS and hasattr(result_table, 'rows'):
if _LAST_RESULT_ITEMS and hasattr(result_table, "rows"):
sorted_items = []
for row in result_table.rows:
src_idx = getattr(row, 'source_index', None)
src_idx = getattr(row, "source_index", None)
if isinstance(src_idx, int) and 0 <= src_idx < len(_LAST_RESULT_ITEMS):
sorted_items.append(_LAST_RESULT_ITEMS[src_idx])
if len(sorted_items) == len(result_table.rows):
@@ -648,7 +661,9 @@ def set_last_result_table(result_table: Optional[Any], items: Optional[List[Any]
pass
def set_last_result_table_overlay(result_table: Optional[Any], items: Optional[List[Any]] = None, subject: Optional[Any] = None) -> None:
def set_last_result_table_overlay(
result_table: Optional[Any], items: Optional[List[Any]] = None, subject: Optional[Any] = None
) -> None:
"""Set a result table as an overlay (display only, no history).
Used for commands like get-tag that want to show a formatted table but
@@ -665,14 +680,18 @@ def set_last_result_table_overlay(result_table: Optional[Any], items: Optional[L
_DISPLAY_SUBJECT = subject
# Sort table by Title/Name column alphabetically if available
if result_table is not None and hasattr(result_table, 'sort_by_title') and not getattr(result_table, 'preserve_order', False):
if (
result_table is not None
and hasattr(result_table, "sort_by_title")
and not getattr(result_table, "preserve_order", False)
):
try:
result_table.sort_by_title()
# Re-order items list to match the sorted table
if _DISPLAY_ITEMS and hasattr(result_table, 'rows'):
if _DISPLAY_ITEMS and hasattr(result_table, "rows"):
sorted_items = []
for row in result_table.rows:
src_idx = getattr(row, 'source_index', None)
src_idx = getattr(row, "source_index", None)
if isinstance(src_idx, int) and 0 <= src_idx < len(_DISPLAY_ITEMS):
sorted_items.append(_DISPLAY_ITEMS[src_idx])
if len(sorted_items) == len(result_table.rows):
@@ -681,7 +700,9 @@ def set_last_result_table_overlay(result_table: Optional[Any], items: Optional[L
pass
def set_last_result_table_preserve_history(result_table: Optional[Any], items: Optional[List[Any]] = None, subject: Optional[Any] = None) -> None:
def set_last_result_table_preserve_history(
result_table: Optional[Any], items: Optional[List[Any]] = None, subject: Optional[Any] = None
) -> None:
"""Update the last result table WITHOUT adding to history.
Used for action commands (delete-tag, add-tags, etc.) that modify data but shouldn't
@@ -794,7 +815,11 @@ def restore_next_result_table() -> bool:
# Pop from forward stack and restore
next_state = _RESULT_TABLE_FORWARD.pop()
if isinstance(next_state, tuple) and len(next_state) >= 3:
_LAST_RESULT_TABLE, _LAST_RESULT_ITEMS, _LAST_RESULT_SUBJECT = next_state[0], next_state[1], next_state[2]
_LAST_RESULT_TABLE, _LAST_RESULT_ITEMS, _LAST_RESULT_SUBJECT = (
next_state[0],
next_state[1],
next_state[2],
)
elif isinstance(next_state, tuple) and len(next_state) == 2:
_LAST_RESULT_TABLE, _LAST_RESULT_ITEMS = next_state
_LAST_RESULT_SUBJECT = None
@@ -880,7 +905,7 @@ def get_last_result_table_source_command() -> Optional[str]:
Returns:
Command name (e.g., 'download-file') or None if not set
"""
if _is_selectable_table(_LAST_RESULT_TABLE) and hasattr(_LAST_RESULT_TABLE, 'source_command'):
if _is_selectable_table(_LAST_RESULT_TABLE) and hasattr(_LAST_RESULT_TABLE, "source_command"):
return _LAST_RESULT_TABLE.source_command
return None
@@ -891,7 +916,7 @@ def get_last_result_table_source_args() -> List[str]:
Returns:
List of arguments (e.g., ['https://example.com']) or empty list
"""
if _is_selectable_table(_LAST_RESULT_TABLE) and hasattr(_LAST_RESULT_TABLE, 'source_args'):
if _is_selectable_table(_LAST_RESULT_TABLE) and hasattr(_LAST_RESULT_TABLE, "source_args"):
return _LAST_RESULT_TABLE.source_args or []
return []
@@ -905,10 +930,10 @@ def get_last_result_table_row_selection_args(row_index: int) -> Optional[List[st
Returns:
Selection arguments (e.g., ['-item', '3']) or None
"""
if _is_selectable_table(_LAST_RESULT_TABLE) and hasattr(_LAST_RESULT_TABLE, 'rows'):
if _is_selectable_table(_LAST_RESULT_TABLE) and hasattr(_LAST_RESULT_TABLE, "rows"):
if 0 <= row_index < len(_LAST_RESULT_TABLE.rows):
row = _LAST_RESULT_TABLE.rows[row_index]
if hasattr(row, 'selection_args'):
if hasattr(row, "selection_args"):
return row.selection_args
return None
@@ -939,7 +964,9 @@ def get_current_stage_table_source_command() -> Optional[str]:
Returns:
Command name (e.g., 'download-file') or None
"""
if _is_selectable_table(_CURRENT_STAGE_TABLE) and hasattr(_CURRENT_STAGE_TABLE, 'source_command'):
if _is_selectable_table(_CURRENT_STAGE_TABLE) and hasattr(
_CURRENT_STAGE_TABLE, "source_command"
):
return _CURRENT_STAGE_TABLE.source_command
return None
@@ -950,7 +977,7 @@ def get_current_stage_table_source_args() -> List[str]:
Returns:
List of arguments or empty list
"""
if _is_selectable_table(_CURRENT_STAGE_TABLE) and hasattr(_CURRENT_STAGE_TABLE, 'source_args'):
if _is_selectable_table(_CURRENT_STAGE_TABLE) and hasattr(_CURRENT_STAGE_TABLE, "source_args"):
return _CURRENT_STAGE_TABLE.source_args or []
return []
@@ -964,10 +991,10 @@ def get_current_stage_table_row_selection_args(row_index: int) -> Optional[List[
Returns:
Selection arguments or None
"""
if _is_selectable_table(_CURRENT_STAGE_TABLE) and hasattr(_CURRENT_STAGE_TABLE, 'rows'):
if _is_selectable_table(_CURRENT_STAGE_TABLE) and hasattr(_CURRENT_STAGE_TABLE, "rows"):
if 0 <= row_index < len(_CURRENT_STAGE_TABLE.rows):
row = _CURRENT_STAGE_TABLE.rows[row_index]
if hasattr(row, 'selection_args'):
if hasattr(row, "selection_args"):
return row.selection_args
return None
@@ -978,10 +1005,10 @@ def get_current_stage_table_row_source_index(row_index: int) -> Optional[int]:
Useful when the table has been sorted for display but selections should map
back to the original item order (e.g., playlist or provider order).
"""
if _is_selectable_table(_CURRENT_STAGE_TABLE) and hasattr(_CURRENT_STAGE_TABLE, 'rows'):
if _is_selectable_table(_CURRENT_STAGE_TABLE) and hasattr(_CURRENT_STAGE_TABLE, "rows"):
if 0 <= row_index < len(_CURRENT_STAGE_TABLE.rows):
row = _CURRENT_STAGE_TABLE.rows[row_index]
return getattr(row, 'source_index', None)
return getattr(row, "source_index", None)
return None
+216 -127
View File
@@ -9,6 +9,7 @@ Features:
- Interactive selection with user input
- Input options for cmdlet arguments (location, source selection, etc)
"""
from __future__ import annotations
from dataclasses import dataclass, field
@@ -27,6 +28,7 @@ from rich.text import Text
# Optional Textual imports - graceful fallback if not available
try:
from textual.widgets import Tree
TEXTUAL_AVAILABLE = True
except ImportError:
TEXTUAL_AVAILABLE = False
@@ -39,13 +41,7 @@ def _sanitize_cell_text(value: Any) -> str:
text = str(value)
if not text:
return ""
return (
text
.replace("\r\n", " ")
.replace("\n", " ")
.replace("\r", " ")
.replace("\t", " ")
)
return text.replace("\r\n", " ").replace("\n", " ").replace("\r", " ").replace("\t", " ")
def _format_duration_hms(duration: Any) -> str:
@@ -133,7 +129,9 @@ def _as_dict(item: Any) -> Optional[Dict[str, Any]]:
def extract_store_value(item: Any) -> str:
data = _as_dict(item) or {}
store = _get_first_dict_value(data, ["store", "table", "source", "storage"]) # storage is legacy
store = _get_first_dict_value(
data, ["store", "table", "source", "storage"]
) # storage is legacy
return str(store or "").strip()
@@ -157,9 +155,8 @@ def extract_ext_value(item: Any) -> str:
meta = data.get("metadata") if isinstance(data.get("metadata"), dict) else {}
raw_path = data.get("path") or data.get("target") or data.get("filename") or data.get("title")
ext = (
_get_first_dict_value(data, ["ext", "file_ext", "extension"])
or _get_first_dict_value(meta, ["ext", "file_ext", "extension"])
ext = _get_first_dict_value(data, ["ext", "file_ext", "extension"]) or _get_first_dict_value(
meta, ["ext", "file_ext", "extension"]
)
if (not ext) and raw_path:
@@ -182,10 +179,9 @@ def extract_size_bytes_value(item: Any) -> Optional[int]:
data = _as_dict(item) or {}
meta = data.get("metadata") if isinstance(data.get("metadata"), dict) else {}
size_val = (
_get_first_dict_value(data, ["size_bytes", "size", "file_size", "bytes", "filesize"])
or _get_first_dict_value(meta, ["size_bytes", "size", "file_size", "bytes", "filesize"])
)
size_val = _get_first_dict_value(
data, ["size_bytes", "size", "file_size", "bytes", "filesize"]
) or _get_first_dict_value(meta, ["size_bytes", "size", "file_size", "bytes", "filesize"])
if size_val is None:
return None
try:
@@ -239,6 +235,7 @@ class InputOption:
table.add_input_option(location_opt)
selected = table.select_option("location") # Returns user choice
"""
name: str
"""Option name (maps to cmdlet argument)"""
type: str = "string"
@@ -270,6 +267,7 @@ class TUIResultCard:
Used in hub-ui and TUI contexts to render individual search results
as grouped components with visual structure.
"""
title: str
subtitle: Optional[str] = None
metadata: Optional[Dict[str, str]] = None
@@ -290,6 +288,7 @@ class TUIResultCard:
@dataclass
class ResultColumn:
"""Represents a single column in a result table."""
name: str
value: str
width: Optional[int] = None
@@ -306,6 +305,7 @@ class ResultColumn:
@dataclass
class ResultRow:
"""Represents a single row in a result table."""
columns: List[ResultColumn] = field(default_factory=list)
selection_args: Optional[List[str]] = None
"""Arguments to use for this row when selected via @N syntax (e.g., ['-item', '3'])"""
@@ -374,7 +374,13 @@ class ResultTable:
>>> print(result_table)
"""
def __init__(self, title: str = "", title_width: int = 80, max_columns: Optional[int] = None, preserve_order: bool = False):
def __init__(
self,
title: str = "",
title_width: int = 80,
max_columns: Optional[int] = None,
preserve_order: bool = False,
):
"""Initialize a result table.
Args:
@@ -389,13 +395,19 @@ class ResultTable:
cmdlet_name = ""
try:
cmdlet_name = ctx.get_current_cmdlet_name("") if hasattr(ctx, "get_current_cmdlet_name") else ""
cmdlet_name = (
ctx.get_current_cmdlet_name("")
if hasattr(ctx, "get_current_cmdlet_name")
else ""
)
except Exception:
cmdlet_name = ""
stage_text = ""
try:
stage_text = ctx.get_current_stage_text("") if hasattr(ctx, "get_current_stage_text") else ""
stage_text = (
ctx.get_current_stage_text("") if hasattr(ctx, "get_current_stage_text") else ""
)
except Exception:
stage_text = ""
@@ -404,12 +416,16 @@ class ResultTable:
normalized_title = str(self.title or "").strip().lower()
normalized_stage = str(stage_text).strip()
if normalized_stage and normalized_stage.lower().startswith(normalized_cmd):
if (not normalized_title) or normalized_title.replace("_", "-").startswith(normalized_cmd):
if (not normalized_title) or normalized_title.replace("_", "-").startswith(
normalized_cmd
):
self.title = normalized_stage
except Exception:
pass
self.title_width = title_width
self.max_columns = max_columns if max_columns is not None else 5 # Default 5 for cleaner display
self.max_columns = (
max_columns if max_columns is not None else 5
) # Default 5 for cleaner display
self.rows: List[ResultRow] = []
self.column_widths: Dict[str, int] = {}
self.input_options: Dict[str, InputOption] = {}
@@ -486,7 +502,13 @@ class ResultTable:
self.source_args = args or []
return self
def init_command(self, title: str, command: str, args: Optional[List[str]] = None, preserve_order: bool = False) -> "ResultTable":
def init_command(
self,
title: str,
command: str,
args: Optional[List[str]] = None,
preserve_order: bool = False,
) -> "ResultTable":
"""Initialize table with title, command, args, and preserve_order in one call.
Consolidates common initialization pattern: ResultTable(title) + set_source_command(cmd, args) + set_preserve_order(preserve_order)
@@ -522,7 +544,7 @@ class ResultTable:
title=new_title,
title_width=self.title_width,
max_columns=self.max_columns,
preserve_order=self.preserve_order
preserve_order=self.preserve_order,
)
new_table.source_command = self.source_command
new_table.source_args = list(self.source_args) if self.source_args else []
@@ -554,7 +576,12 @@ class ResultTable:
"""Attach a single metadata line beneath the title."""
return self.set_header_lines([line] if line else [])
def set_storage_summary(self, storage_counts: Dict[str, int], filter_text: Optional[str] = None, inline: bool = False) -> str:
def set_storage_summary(
self,
storage_counts: Dict[str, int],
filter_text: Optional[str] = None,
inline: bool = False,
) -> str:
"""Render a storage count summary (e.g., "Hydrus:0 Local:1 | filter: \"q\"").
Returns the summary string so callers can place it inline with the title if desired.
@@ -562,10 +589,12 @@ class ResultTable:
summary_parts: List[str] = []
if storage_counts:
summary_parts.append(" ".join(f"{name}:{count}" for name, count in storage_counts.items()))
summary_parts.append(
" ".join(f"{name}:{count}" for name, count in storage_counts.items())
)
if filter_text:
safe_filter = filter_text.replace("\"", "\\\"")
safe_filter = filter_text.replace('"', '\\"')
summary_parts.append(f'filter: "{safe_filter}"')
summary = " | ".join(summary_parts)
@@ -602,7 +631,11 @@ class ResultTable:
return self
# Sort rows by the title column value (case-insensitive)
self.rows.sort(key=lambda row: row.columns[title_col_idx].value.lower() if title_col_idx < len(row.columns) else "")
self.rows.sort(
key=lambda row: (
row.columns[title_col_idx].value.lower() if title_col_idx < len(row.columns) else ""
)
)
return self
@@ -618,22 +651,22 @@ class ResultTable:
row = self.add_row()
# Handle TagItem from get_tag.py (tag display with index)
if hasattr(result, '__class__') and result.__class__.__name__ == 'TagItem':
if hasattr(result, "__class__") and result.__class__.__name__ == "TagItem":
self._add_tag_item(row, result)
# Handle ResultItem from search_file.py (compact display)
elif hasattr(result, '__class__') and result.__class__.__name__ == 'ResultItem':
elif hasattr(result, "__class__") and result.__class__.__name__ == "ResultItem":
self._add_result_item(row, result)
# Handle SearchResult from search_file.py
elif hasattr(result, '__class__') and result.__class__.__name__ == 'SearchResult':
elif hasattr(result, "__class__") and result.__class__.__name__ == "SearchResult":
self._add_search_result(row, result)
# Handle PipeObject from models.py
elif hasattr(result, '__class__') and result.__class__.__name__ == 'PipeObject':
elif hasattr(result, "__class__") and result.__class__.__name__ == "PipeObject":
self._add_pipe_object(row, result)
# Handle dict
elif isinstance(result, dict):
self._add_dict(row, result)
# Handle generic objects with __dict__
elif hasattr(result, '__dict__'):
elif hasattr(result, "__dict__"):
self._add_generic_object(row, result)
# Handle strings (simple text result)
elif isinstance(result, str):
@@ -651,15 +684,15 @@ class ResultTable:
return
# Core fields (legacy fallback)
title = getattr(result, 'title', '')
table = str(getattr(result, 'table', '') or '').lower()
title = getattr(result, "title", "")
table = str(getattr(result, "table", "") or "").lower()
# Handle extension separation for local files
extension = ""
if title and table == 'local':
if title and table == "local":
path_obj = Path(title)
if path_obj.suffix:
extension = path_obj.suffix.lstrip('.')
extension = path_obj.suffix.lstrip(".")
title = path_obj.stem
if title:
@@ -668,30 +701,30 @@ class ResultTable:
# Extension column
row.add_column("Ext", extension)
if hasattr(result, 'table') and getattr(result, 'table', None):
row.add_column("Source", str(getattr(result, 'table')))
if hasattr(result, "table") and getattr(result, "table", None):
row.add_column("Source", str(getattr(result, "table")))
if hasattr(result, 'detail') and result.detail:
if hasattr(result, "detail") and result.detail:
row.add_column("Detail", result.detail)
if hasattr(result, 'media_kind') and result.media_kind:
if hasattr(result, "media_kind") and result.media_kind:
row.add_column("Type", result.media_kind)
# Tag summary
if hasattr(result, 'tag_summary') and result.tag_summary:
if hasattr(result, "tag_summary") and result.tag_summary:
row.add_column("Tag", str(result.tag_summary))
# Duration (for media)
if hasattr(result, 'duration_seconds') and result.duration_seconds:
if hasattr(result, "duration_seconds") and result.duration_seconds:
dur = _format_duration_hms(result.duration_seconds)
row.add_column("Duration", dur or str(result.duration_seconds))
# Size (for files)
if hasattr(result, 'size_bytes') and result.size_bytes:
if hasattr(result, "size_bytes") and result.size_bytes:
row.add_column("Size", _format_size(result.size_bytes, integer_only=False))
# Annotations
if hasattr(result, 'annotations') and result.annotations:
if hasattr(result, "annotations") and result.annotations:
row.add_column("Annotations", ", ".join(str(a) for a in result.annotations))
def _add_result_item(self, row: ResultRow, item: Any) -> None:
@@ -707,16 +740,16 @@ class ResultTable:
Use @row# syntax to pipe full item data to next command.
"""
# Title (required)
title = getattr(item, 'title', None) or 'Unknown'
table = str(getattr(item, 'table', '') or getattr(item, 'store', '') or '').lower()
title = getattr(item, "title", None) or "Unknown"
table = str(getattr(item, "table", "") or getattr(item, "store", "") or "").lower()
# Handle extension separation for local files
extension = ""
if title and table == 'local':
if title and table == "local":
# Try to split extension
path_obj = Path(title)
if path_obj.suffix:
extension = path_obj.suffix.lstrip('.')
extension = path_obj.suffix.lstrip(".")
title = path_obj.stem
if title:
@@ -726,13 +759,13 @@ class ResultTable:
row.add_column("Ext", extension)
# Storage (source backend - hydrus, local, debrid, etc)
if getattr(item, 'table', None):
row.add_column("Storage", str(getattr(item, 'table')))
elif getattr(item, 'store', None):
row.add_column("Storage", str(getattr(item, 'store')))
if getattr(item, "table", None):
row.add_column("Storage", str(getattr(item, "table")))
elif getattr(item, "store", None):
row.add_column("Storage", str(getattr(item, "store")))
# Size (for files)
if hasattr(item, 'size_bytes') and item.size_bytes:
if hasattr(item, "size_bytes") and item.size_bytes:
row.add_column("Size", _format_size(item.size_bytes, integer_only=False))
def _add_tag_item(self, row: ResultRow, item: Any) -> None:
@@ -744,42 +777,41 @@ class ResultTable:
Tag row selection is handled by the CLI pipeline (e.g. `@N | ...`).
"""
# Tag name
if hasattr(item, 'tag_name') and item.tag_name:
if hasattr(item, "tag_name") and item.tag_name:
row.add_column("Tag", item.tag_name)
# Source/Store (where the tag values come from)
if hasattr(item, 'source') and item.source:
if hasattr(item, "source") and item.source:
row.add_column("Store", item.source)
def _add_pipe_object(self, row: ResultRow, obj: Any) -> None:
"""Extract and add PipeObject fields to row."""
# Source and identifier
if hasattr(obj, 'source') and obj.source:
if hasattr(obj, "source") and obj.source:
row.add_column("Source", obj.source)
# Title
if hasattr(obj, 'title') and obj.title:
if hasattr(obj, "title") and obj.title:
row.add_column("Title", obj.title)
# File info
if hasattr(obj, 'path') and obj.path:
if hasattr(obj, "path") and obj.path:
row.add_column("Path", str(obj.path))
# Tag
if hasattr(obj, 'tag') and obj.tag:
if hasattr(obj, "tag") and obj.tag:
tag_str = ", ".join(obj.tag[:3]) # First 3 tag values
if len(obj.tag) > 3:
tag_str += f", +{len(obj.tag) - 3} more"
row.add_column("Tag", tag_str)
# Duration
if hasattr(obj, 'duration') and obj.duration:
if hasattr(obj, "duration") and obj.duration:
dur = _format_duration_hms(obj.duration)
row.add_column("Duration", dur or str(obj.duration))
# Warnings
if hasattr(obj, 'warnings') and obj.warnings:
if hasattr(obj, "warnings") and obj.warnings:
warnings_str = "; ".join(obj.warnings[:2])
if len(obj.warnings) > 2:
warnings_str += f" (+{len(obj.warnings) - 2} more)"
@@ -800,15 +832,27 @@ class ResultTable:
- size | size_bytes
- ext
"""
# Helper to determine if a field should be hidden from display
def is_hidden_field(field_name: Any) -> bool:
# Hide internal/metadata fields
hidden_fields = {
'__', 'id', 'action', 'parent_id', 'is_temp', 'path', 'extra',
'target', 'hash', 'hash_hex', 'file_hash', 'tag', 'tag_summary'
"__",
"id",
"action",
"parent_id",
"is_temp",
"path",
"extra",
"target",
"hash",
"hash_hex",
"file_hash",
"tag",
"tag_summary",
}
if isinstance(field_name, str):
if field_name.startswith('__'):
if field_name.startswith("__"):
return True
if field_name in hidden_fields:
return True
@@ -821,7 +865,12 @@ class ResultTable:
# This keeps Ext/Size/Store consistent across all dict-based result sources.
try:
store_extracted = extract_store_value(data)
if store_extracted and "store" not in visible_data and "table" not in visible_data and "source" not in visible_data:
if (
store_extracted
and "store" not in visible_data
and "table" not in visible_data
and "source" not in visible_data
):
visible_data["store"] = store_extracted
except Exception:
pass
@@ -835,34 +884,44 @@ class ResultTable:
try:
size_extracted = extract_size_bytes_value(data)
if size_extracted is not None and "size_bytes" not in visible_data and "size" not in visible_data:
if (
size_extracted is not None
and "size_bytes" not in visible_data
and "size" not in visible_data
):
visible_data["size_bytes"] = size_extracted
except Exception:
pass
# Handle extension separation for local files
store_val = str(visible_data.get('store', '') or visible_data.get('table', '') or visible_data.get('source', '')).lower()
store_val = str(
visible_data.get("store", "")
or visible_data.get("table", "")
or visible_data.get("source", "")
).lower()
# Debug logging
# print(f"DEBUG: Processing dict result. Store: {store_val}, Keys: {list(visible_data.keys())}")
if store_val == 'local':
if store_val == "local":
# Find title field
title_field = next((f for f in ['title', 'name', 'filename'] if f in visible_data), None)
title_field = next(
(f for f in ["title", "name", "filename"] if f in visible_data), None
)
if title_field:
title_val = str(visible_data[title_field])
path_obj = Path(title_val)
if path_obj.suffix:
extension = path_obj.suffix.lstrip('.')
extension = path_obj.suffix.lstrip(".")
visible_data[title_field] = path_obj.stem
visible_data['ext'] = extension
visible_data["ext"] = extension
# print(f"DEBUG: Split extension. Title: {visible_data[title_field]}, Ext: {extension}")
else:
visible_data['ext'] = ""
visible_data["ext"] = ""
# Ensure 'ext' is present so it gets picked up by priority_groups in correct order
if 'ext' not in visible_data:
visible_data['ext'] = ""
if "ext" not in visible_data:
visible_data["ext"] = ""
# Track which fields we've already added to avoid duplicates
added_fields = set()
@@ -879,11 +938,15 @@ class ResultTable:
# Special handling for 'columns' field from search providers
# If present, use it to populate row columns dynamically
if 'columns' in visible_data and isinstance(visible_data['columns'], list) and visible_data['columns']:
if (
"columns" in visible_data
and isinstance(visible_data["columns"], list)
and visible_data["columns"]
):
try:
for col_name, col_value in visible_data['columns']:
for col_name, col_value in visible_data["columns"]:
# Skip the "#" column as ResultTable already adds row numbers
if col_name == '#':
if col_name == "#":
continue
if column_count >= self.max_columns:
break
@@ -911,17 +974,17 @@ class ResultTable:
added_fields.add(col_name.lower())
column_count += 1
# Mark 'columns' as handled so we don't add it as a field
added_fields.add('columns')
added_fields.add("columns")
# Also mark common fields that shouldn't be re-displayed if they're in columns
# This prevents showing both "Store" (from columns) and "Store" (from data fields)
added_fields.add('table')
added_fields.add('source')
added_fields.add('target')
added_fields.add('path')
added_fields.add('media_kind')
added_fields.add('detail')
added_fields.add('annotations')
added_fields.add('full_metadata') # Don't display full metadata as column
added_fields.add("table")
added_fields.add("source")
added_fields.add("target")
added_fields.add("path")
added_fields.add("media_kind")
added_fields.add("detail")
added_fields.add("annotations")
added_fields.add("full_metadata") # Don't display full metadata as column
except Exception:
# Fall back to regular field handling if columns format is unexpected
pass
@@ -930,10 +993,10 @@ class ResultTable:
if column_count == 0:
# Explicitly set which columns to display in order
priority_groups = [
('title', ['title', 'name', 'filename']),
('store', ['store', 'table', 'source']),
('size', ['size', 'size_bytes']),
('ext', ['ext']),
("title", ["title", "name", "filename"]),
("store", ["store", "table", "source"]),
("size", ["size", "size_bytes"]),
("ext", ["ext"]),
]
# Add priority field groups first - use first match in each group
@@ -943,20 +1006,20 @@ class ResultTable:
for field in field_options:
if field in visible_data and field not in added_fields:
# Special handling for size fields - format with unit and decimals
if field in ['size', 'size_bytes']:
if field in ["size", "size_bytes"]:
value_str = _format_size(visible_data[field], integer_only=False)
else:
value_str = format_value(visible_data[field])
# Map field names to display column names
if field in ['store', 'table', 'source']:
if field in ["store", "table", "source"]:
col_name = "Store"
elif field in ['size', 'size_bytes']:
elif field in ["size", "size_bytes"]:
col_name = "Size"
elif field in ['title', 'name', 'filename']:
elif field in ["title", "name", "filename"]:
col_name = "Title"
else:
col_name = field.replace('_', ' ').title()
col_name = field.replace("_", " ").title()
row.add_column(col_name, value_str)
added_fields.add(field)
@@ -967,19 +1030,19 @@ class ResultTable:
# Don't add any remaining fields - only use priority_groups for dict results
# Check for selection args
if '_selection_args' in data:
row.selection_args = data['_selection_args']
if "_selection_args" in data:
row.selection_args = data["_selection_args"]
# Don't display it
added_fields.add('_selection_args')
added_fields.add("_selection_args")
def _add_generic_object(self, row: ResultRow, obj: Any) -> None:
"""Extract and add fields from generic objects."""
if hasattr(obj, '__dict__'):
if hasattr(obj, "__dict__"):
for key, value in obj.__dict__.items():
if key.startswith('_'): # Skip private attributes
if key.startswith("_"): # Skip private attributes
continue
row.add_column(key.replace('_', ' ').title(), str(value))
row.add_column(key.replace("_", " ").title(), str(value))
def to_rich(self):
"""Return a Rich renderable representing this table."""
@@ -1059,7 +1122,7 @@ class ResultTable:
data = {
"title": self.title,
"row_count": len(self.rows),
"rows": [row.to_list() for row in self.rows]
"rows": [row.to_list() for row in self.rows],
}
return json.dumps(data, indent=2)
@@ -1069,10 +1132,7 @@ class ResultTable:
Returns:
Dictionary representation
"""
return {
"title": self.title,
"rows": [row.to_list() for row in self.rows]
}
return {"title": self.title, "rows": [row.to_list() for row in self.rows]}
def __str__(self) -> str:
"""String representation.
@@ -1102,7 +1162,9 @@ class ResultTable:
"""Get row by index."""
return self.rows[index]
def select_interactive(self, prompt: str = "Select an item", accept_args: bool = False) -> Optional[List[int]] | dict:
def select_interactive(
self, prompt: str = "Select an item", accept_args: bool = False
) -> Optional[List[int]] | dict:
"""Display table and get interactive user selection (single or multiple).
Supports multiple input formats:
@@ -1139,11 +1201,15 @@ class ResultTable:
while True:
try:
if accept_args:
choice = Prompt.ask(f"{prompt} (e.g., '5' or '2 -storage hydrus' or 'q' to quit)").strip()
choice = Prompt.ask(
f"{prompt} (e.g., '5' or '2 -storage hydrus' or 'q' to quit)"
).strip()
else:
choice = Prompt.ask(f"{prompt} (e.g., '5' or '3-5' or '1,3,5' or 'q' to quit)").strip()
choice = Prompt.ask(
f"{prompt} (e.g., '5' or '3-5' or '1,3,5' or 'q' to quit)"
).strip()
if choice.lower() == 'q':
if choice.lower() == "q":
return None
if accept_args:
@@ -1151,18 +1217,42 @@ class ResultTable:
result = self._parse_selection_with_args(choice)
if result is not None:
return result
stdout_console().print(Panel(Text("Invalid format. Use: selection (5 or 3-5 or 1,3,5) optionally followed by flags (e.g., '5 -storage hydrus').")))
stdout_console().print(
Panel(
Text(
"Invalid format. Use: selection (5 or 3-5 or 1,3,5) optionally followed by flags (e.g., '5 -storage hydrus')."
)
)
)
else:
# Parse just the selection
selected_indices = self._parse_selection(choice)
if selected_indices is not None:
return selected_indices
stdout_console().print(Panel(Text("Invalid format. Use: single (5), range (3-5), list (1,3,5), combined (1-3,7,9-11), or 'q' to quit.")))
stdout_console().print(
Panel(
Text(
"Invalid format. Use: single (5), range (3-5), list (1,3,5), combined (1-3,7,9-11), or 'q' to quit."
)
)
)
except (ValueError, EOFError):
if accept_args:
stdout_console().print(Panel(Text("Invalid format. Use: selection (5 or 3-5 or 1,3,5) optionally followed by flags (e.g., '5 -storage hydrus').")))
stdout_console().print(
Panel(
Text(
"Invalid format. Use: selection (5 or 3-5 or 1,3,5) optionally followed by flags (e.g., '5 -storage hydrus')."
)
)
)
else:
stdout_console().print(Panel(Text("Invalid format. Use: single (5), range (3-5), list (1,3,5), combined (1-3,7,9-11), or 'q' to quit.")))
stdout_console().print(
Panel(
Text(
"Invalid format. Use: single (5), range (3-5), list (1,3,5), combined (1-3,7,9-11), or 'q' to quit."
)
)
)
def _parse_selection(self, selection_str: str) -> Optional[List[int]]:
"""Parse user selection string into list of 0-based indices.
@@ -1185,7 +1275,7 @@ class ResultTable:
indices = set()
# Split by comma for multiple selections
parts = selection_str.split(',')
parts = selection_str.split(",")
for part in parts:
part = part.strip()
@@ -1193,10 +1283,10 @@ class ResultTable:
continue
# Check if it's a range (contains dash)
if '-' in part:
if "-" in part:
# Handle ranges like "3-5"
try:
range_parts = part.split('-')
range_parts = part.split("-")
if len(range_parts) != 2:
return None
@@ -1283,10 +1373,7 @@ class ResultTable:
else:
i += 1
return {
"indices": selected_indices,
"args": cmdlet_args
}
return {"indices": selected_indices, "args": cmdlet_args}
def add_input_option(self, option: InputOption) -> "ResultTable":
"""Add an interactive input option to the table.
@@ -1330,9 +1417,11 @@ class ResultTable:
for i, choice in enumerate(option.choices, 1):
print(f" {i}. {choice}")
choice_input = input(f"Select {option_name} (1-{len(option.choices)}, or 'q' to cancel): ").strip()
choice_input = input(
f"Select {option_name} (1-{len(option.choices)}, or 'q' to cancel): "
).strip()
if choice_input.lower() == 'q':
if choice_input.lower() == "q":
return None
try:
@@ -1347,7 +1436,7 @@ class ResultTable:
elif option.type in ("string", "integer"):
value = input(f"{prompt_text} (or 'q' to cancel): ").strip()
if value.lower() == 'q':
if value.lower() == "q":
return None
# Validate if validator provided
@@ -1368,9 +1457,9 @@ class ResultTable:
# For flag options
elif option.type == "flag":
response = input(f"{prompt_text} (y/n): ").strip().lower()
if response == 'q':
if response == "q":
return None
return "true" if response in ('y', 'yes', 'true') else "false"
return "true" if response in ("y", "yes", "true") else "false"
except (ValueError, EOFError):
return None
@@ -1489,7 +1578,7 @@ class ResultTable:
tag=tag,
file_hash=file_hash or None,
file_size=file_size or None,
duration=duration or None
duration=duration or None,
)
def build_metadata_tree(self, tree_widget: "Tree") -> None:
+109 -58
View File
@@ -88,7 +88,16 @@ def run_platform_bootstrap(repo_root: Path) -> int:
if not exe:
print("PowerShell not found; cannot run bootstrap.ps1", file=sys.stderr)
return 1
cmd = [exe, "-NoProfile", "-NonInteractive", "-ExecutionPolicy", "Bypass", "-File", str(ps1), "-Quiet"]
cmd = [
exe,
"-NoProfile",
"-NonInteractive",
"-ExecutionPolicy",
"Bypass",
"-File",
str(ps1),
"-Quiet",
]
elif sh_script.exists():
shell = _find_shell()
if not shell:
@@ -189,15 +198,23 @@ def _install_deno(version: str | None = None) -> int:
def main() -> int:
parser = argparse.ArgumentParser(description="Bootstrap Medios-Macina: install deps and Playwright browsers")
parser.add_argument(
"--skip-deps", action="store_true", help="Skip installing Python dependencies from requirements.txt"
parser = argparse.ArgumentParser(
description="Bootstrap Medios-Macina: install deps and Playwright browsers"
)
parser.add_argument(
"--no-playwright", action="store_true", help="Skip running 'playwright install' (only install packages)"
"--skip-deps",
action="store_true",
help="Skip installing Python dependencies from requirements.txt",
)
parser.add_argument(
"--playwright-only", action="store_true", help="Only run 'playwright install' (skips dependency installation)"
"--no-playwright",
action="store_true",
help="Skip running 'playwright install' (only install packages)",
)
parser.add_argument(
"--playwright-only",
action="store_true",
help="Only run 'playwright install' (skips dependency installation)",
)
parser.add_argument(
"--browsers",
@@ -212,12 +229,23 @@ def main() -> int:
)
deno_group = parser.add_mutually_exclusive_group()
deno_group.add_argument(
"--install-deno", action="store_true", help="Install the Deno runtime (default behavior; kept for explicitness)"
"--install-deno",
action="store_true",
help="Install the Deno runtime (default behavior; kept for explicitness)",
)
deno_group.add_argument(
"--no-deno", action="store_true", help="Skip installing Deno runtime (opt out)"
)
deno_group.add_argument("--no-deno", action="store_true", help="Skip installing Deno runtime (opt out)")
parser.add_argument("--deno-version", type=str, default=None, help="Specific Deno version to install (e.g., v1.34.3)")
parser.add_argument(
"--upgrade-pip", action="store_true", help="Upgrade pip/setuptools/wheel before installing requirements"
"--deno-version",
type=str,
default=None,
help="Specific Deno version to install (e.g., v1.34.3)",
)
parser.add_argument(
"--upgrade-pip",
action="store_true",
help="Upgrade pip/setuptools/wheel before installing requirements",
)
args = parser.parse_args()
@@ -297,12 +325,26 @@ def main() -> int:
if args.upgrade_pip:
print("Upgrading pip, setuptools, and wheel in local venv...")
run([str(venv_python), "-m", "pip", "install", "--upgrade", "pip", "setuptools", "wheel"])
run(
[
str(venv_python),
"-m",
"pip",
"install",
"--upgrade",
"pip",
"setuptools",
"wheel",
]
)
if not args.skip_deps:
req_file = repo_root / "requirements.txt"
if not req_file.exists():
print(f"requirements.txt not found at {req_file}; skipping dependency installation.", file=sys.stderr)
print(
f"requirements.txt not found at {req_file}; skipping dependency installation.",
file=sys.stderr,
)
else:
print(f"Installing Python dependencies into local venv from {req_file}...")
run([str(venv_python), "-m", "pip", "install", "-r", str(req_file)])
@@ -360,7 +402,9 @@ def main() -> int:
site_dir = Path(sp)
break
if site_dir is None:
print("Could not determine venv site-packages directory; skipping .pth fallback")
print(
"Could not determine venv site-packages directory; skipping .pth fallback"
)
else:
pth_file = site_dir / "medeia_repo.pth"
if pth_file.exists():
@@ -378,7 +422,12 @@ def main() -> int:
# Re-check whether CLI can be imported now
rc2 = subprocess.run(
[str(venv_python), "-c", "import importlib; importlib.import_module('CLI')"], check=False
[
str(venv_python),
"-c",
"import importlib; importlib.import_module('CLI')",
],
check=False,
)
if rc2.returncode == 0:
print("Top-level 'CLI' import works after adding .pth")
@@ -452,8 +501,8 @@ python -m medeia_macina.cli_entry @args
"@echo off\r\n"
"set SCRIPT_DIR=%~dp0\r\n"
"set PATH=%SCRIPT_DIR%\\.venv\\Scripts;%PATH%\r\n"
"if exist \"%SCRIPT_DIR%\\.venv\\Scripts\\python.exe\" \"%SCRIPT_DIR%\\.venv\\Scripts\\python.exe\" -m medeia_macina.cli_entry %*\r\n"
"if exist \"%SCRIPT_DIR%\\CLI.py\" python \"%SCRIPT_DIR%\\CLI.py\" %*\r\n"
'if exist "%SCRIPT_DIR%\\.venv\\Scripts\\python.exe" "%SCRIPT_DIR%\\.venv\\Scripts\\python.exe" -m medeia_macina.cli_entry %*\r\n'
'if exist "%SCRIPT_DIR%\\CLI.py" python "%SCRIPT_DIR%\\CLI.py" %*\r\n'
"python -m medeia_macina.cli_entry %*\r\n"
)
try:
@@ -478,12 +527,12 @@ python -m medeia_macina.cli_entry @args
cmd_text = (
f"@echo off\r\n"
f"set REPO={repo}\r\n"
f"if exist \"%REPO%\\.venv\\Scripts\\mm.exe\" \"%REPO%\\.venv\\Scripts\\mm.exe\" %*\r\n"
f'if exist "%REPO%\\.venv\\Scripts\\mm.exe" "%REPO%\\.venv\\Scripts\\mm.exe" %*\r\n'
f"if defined MM_DEBUG (\r\n"
f" echo MM_DEBUG: REPO=%REPO%\r\n"
f" if exist \"%REPO%\\.venv\\Scripts\\python.exe\" \"%REPO%\\.venv\\Scripts\\python.exe\" -c \"import sys,importlib,importlib.util; print('sys.executable:', sys.executable); print('sys.path (first 8):', sys.path[:8]);\" \r\n"
f' if exist "%REPO%\\.venv\\Scripts\\python.exe" "%REPO%\\.venv\\Scripts\\python.exe" -c "import sys,importlib,importlib.util; print(\'sys.executable:\', sys.executable); print(\'sys.path (first 8):\', sys.path[:8]);" \r\n'
f")\r\n"
f"if exist \"%REPO%\\.venv\\Scripts\\python.exe\" \"%REPO%\\.venv\\Scripts\\python.exe\" -m medeia_macina.cli_entry %*\r\n"
f'if exist "%REPO%\\.venv\\Scripts\\python.exe" "%REPO%\\.venv\\Scripts\\python.exe" -m medeia_macina.cli_entry %*\r\n'
f"python -m medeia_macina.cli_entry %*\r\n"
)
if mm_cmd.exists():
@@ -495,14 +544,14 @@ python -m medeia_macina.cli_entry @args
mm_ps1 = user_bin / "mm.ps1"
ps1_text = (
"Param([Parameter(ValueFromRemainingArguments=$true)] $args)\n"
f"$repo = \"{repo}\"\n"
f'$repo = "{repo}"\n'
"$venv = Join-Path $repo '.venv'\n"
"$exe = Join-Path $venv 'Scripts\\mm.exe'\n"
"if (Test-Path $exe) { & $exe @args; exit $LASTEXITCODE }\n"
"$py = Join-Path $venv 'Scripts\\python.exe'\n"
"if (Test-Path $py) {\n"
" if ($env:MM_DEBUG) {\n"
" Write-Host \"MM_DEBUG: diagnostics\" -ForegroundColor Yellow\n"
' Write-Host "MM_DEBUG: diagnostics" -ForegroundColor Yellow\n'
" & $py -c \"import sys,importlib,importlib.util,traceback; print('sys.executable:', sys.executable); print('sys.path (first 8):', sys.path[:8]);\"\n"
" }\n"
" & $py -m medeia_macina.cli_entry @args; exit $LASTEXITCODE\n"
@@ -524,7 +573,9 @@ python -m medeia_macina.cli_entry @args
"$cur = [Environment]::GetEnvironmentVariable('PATH','User');"
"if ($cur -notlike \"*$bin*\") {[Environment]::SetEnvironmentVariable('PATH', ($bin + ';' + ($cur -ne $null ? $cur : '')), 'User')}"
).format(bin=str_bin.replace("\\", "\\\\"))
subprocess.run(["powershell", "-NoProfile", "-Command", ps_cmd], check=False)
subprocess.run(
["powershell", "-NoProfile", "-Command", ps_cmd], check=False
)
except Exception:
pass
@@ -539,64 +590,64 @@ python -m medeia_macina.cli_entry @args
sh_text = (
"#!/usr/bin/env bash\n"
"set -e\n"
f"REPO=\"{repo}\"\n"
f'REPO="{repo}"\n'
"# Prefer git top-level when available to avoid embedding a parent path.\n"
"if command -v git >/dev/null 2>&1; then\n"
" gitroot=$(git -C \"$REPO\" rev-parse --show-toplevel 2>/dev/null || true)\n"
" if [ -n \"$gitroot\" ]; then\n"
" REPO=\"$gitroot\"\n"
' gitroot=$(git -C "$REPO" rev-parse --show-toplevel 2>/dev/null || true)\n'
' if [ -n "$gitroot" ]; then\n'
' REPO="$gitroot"\n'
" fi\n"
"fi\n"
"# If git not available or didn't resolve, walk up from CWD to find a project root.\n"
"if [ ! -f \"$REPO/CLI.py\" ] && [ ! -f \"$REPO/pyproject.toml\" ]; then\n"
" CUR=\"$(pwd -P)\"\n"
" while [ \"$CUR\" != \"/\" ] && [ \"$CUR\" != \"\" ]; do\n"
" if [ -f \"$CUR/CLI.py\" ] || [ -f \"$CUR/pyproject.toml\" ]; then\n"
" REPO=\"$CUR\"\n"
'if [ ! -f "$REPO/CLI.py" ] && [ ! -f "$REPO/pyproject.toml" ]; then\n'
' CUR="$(pwd -P)"\n'
' while [ "$CUR" != "/" ] && [ "$CUR" != "" ]; do\n'
' if [ -f "$CUR/CLI.py" ] || [ -f "$CUR/pyproject.toml" ]; then\n'
' REPO="$CUR"\n'
" break\n"
" fi\n"
" CUR=\"$(dirname \"$CUR\")\"\n"
' CUR="$(dirname "$CUR")"\n'
" done\n"
"fi\n"
"VENV=\"$REPO/.venv\"\n"
'VENV="$REPO/.venv"\n'
"# Debug mode: set MM_DEBUG=1 to print repository, venv, and import diagnostics\n"
"if [ -n \"${MM_DEBUG:-}\" ]; then\n"
" echo \"MM_DEBUG: diagnostics\" >&2\n"
" echo \"Resolved REPO: $REPO\" >&2\n"
" echo \"Resolved VENV: $VENV\" >&2\n"
" echo \"VENV exists: $( [ -d \"$VENV\" ] && echo yes || echo no )\" >&2\n"
" echo \"Candidates:\" >&2\n"
" echo \" VENV/bin/mm: $( [ -x \"$VENV/bin/mm\" ] && echo yes || echo no )\" >&2\n"
" echo \" VENV/bin/python3: $( [ -x \"$VENV/bin/python3\" ] && echo yes || echo no )\" >&2\n"
" echo \" VENV/bin/python: $( [ -x \"$VENV/bin/python\" ] && echo yes || echo no )\" >&2\n"
" echo \" system python3: $(command -v python3 || echo none)\" >&2\n"
" echo \" system python: $(command -v python || echo none)\" >&2\n"
" for pycmd in \"$VENV/bin/python3\" \"$VENV/bin/python\" \"$(command -v python3 2>/dev/null)\" \"$(command -v python 2>/dev/null)\"; do\n"
" if [ -n \"$pycmd\" ] && [ -x \"$pycmd\" ]; then\n"
" echo \"---- Testing with: $pycmd ----\" >&2\n"
'if [ -n "${MM_DEBUG:-}" ]; then\n'
' echo "MM_DEBUG: diagnostics" >&2\n'
' echo "Resolved REPO: $REPO" >&2\n'
' echo "Resolved VENV: $VENV" >&2\n'
' echo "VENV exists: $( [ -d "$VENV" ] && echo yes || echo no )" >&2\n'
' echo "Candidates:" >&2\n'
' echo " VENV/bin/mm: $( [ -x "$VENV/bin/mm" ] && echo yes || echo no )" >&2\n'
' echo " VENV/bin/python3: $( [ -x "$VENV/bin/python3" ] && echo yes || echo no )" >&2\n'
' echo " VENV/bin/python: $( [ -x "$VENV/bin/python" ] && echo yes || echo no )" >&2\n'
' echo " system python3: $(command -v python3 || echo none)" >&2\n'
' echo " system python: $(command -v python || echo none)" >&2\n'
' for pycmd in "$VENV/bin/python3" "$VENV/bin/python" "$(command -v python3 2>/dev/null)" "$(command -v python 2>/dev/null)"; do\n'
' if [ -n "$pycmd" ] && [ -x "$pycmd" ]; then\n'
' echo "---- Testing with: $pycmd ----" >&2\n'
" $pycmd - <<'PY'\nimport sys, importlib, traceback, importlib.util\nprint('sys.executable:', sys.executable)\nprint('sys.path (first 8):', sys.path[:8])\nfor mod in ('CLI','medeia_macina','medeia_macina.cli_entry'):\n try:\n spec = importlib.util.find_spec(mod)\n print(mod, 'spec:', spec)\n if spec:\n m = importlib.import_module(mod)\n print(mod, 'loaded at', getattr(m, '__file__', None))\n except Exception:\n print(mod, 'import failed')\n traceback.print_exc()\nPY\n"
" fi\n"
" done\n"
" echo \"MM_DEBUG: end diagnostics\" >&2\n"
' echo "MM_DEBUG: end diagnostics" >&2\n'
"fi\n"
"# Packaged console script in the venv if available\n"
"if [ -x \"$VENV/bin/mm\" ]; then\n"
" exec \"$VENV/bin/mm\" \"$@\"\n"
'if [ -x "$VENV/bin/mm" ]; then\n'
' exec "$VENV/bin/mm" "$@"\n'
"fi\n"
"# Prefer venv's python3, then venv's python\n"
"if [ -x \"$VENV/bin/python3\" ]; then\n"
" exec \"$VENV/bin/python3\" -m medeia_macina.cli_entry \"$@\"\n"
'if [ -x "$VENV/bin/python3" ]; then\n'
' exec "$VENV/bin/python3" -m medeia_macina.cli_entry "$@"\n'
"fi\n"
"if [ -x \"$VENV/bin/python\" ]; then\n"
" exec \"$VENV/bin/python\" -m medeia_macina.cli_entry \"$@\"\n"
'if [ -x "$VENV/bin/python" ]; then\n'
' exec "$VENV/bin/python" -m medeia_macina.cli_entry "$@"\n'
"fi\n"
"# Fallback to system python3, then system python (only if it's Python 3)\n"
"if command -v python3 >/dev/null 2>&1; then\n"
" exec python3 -m medeia_macina.cli_entry \"$@\"\n"
' exec python3 -m medeia_macina.cli_entry "$@"\n'
"fi\n"
"if command -v python >/dev/null 2>&1; then\n"
" if python -c 'import sys; sys.exit(0 if sys.version_info[0] >= 3 else 1)'; then\n"
" exec python -m medeia_macina.cli_entry \"$@\"\n"
' exec python -m medeia_macina.cli_entry "$@"\n'
" fi\n"
"fi\n"
"echo 'Error: no suitable Python 3 interpreter found. Please install Python 3 or use the venv.' >&2\n"
@@ -614,8 +665,8 @@ python -m medeia_macina.cli_entry @args
profile = home / ".profile"
snippet = (
"# Added by Medeia-Macina setup: ensure user local bin is on PATH\n"
"if [ -d \"$HOME/.local/bin\" ] && [[ \":$PATH:\" != *\":$HOME/.local/bin:\"* ]]; then\n"
" PATH=\"$HOME/.local/bin:$PATH\"\n"
'if [ -d "$HOME/.local/bin" ] && [[ ":$PATH:" != *":$HOME/.local/bin:"* ]]; then\n'
' PATH="$HOME/.local/bin:$PATH"\n'
"fi\n"
)
try:
+879 -49
View File
File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More