Files
Medios-Macina/cmdlet/get_file.py

445 lines
15 KiB
Python
Raw Normal View History

2025-11-25 20:09:33 -08:00
from __future__ import annotations
2025-12-11 12:47:30 -08:00
from typing import Any, Dict, Sequence
2025-11-25 20:09:33 -08:00
from pathlib import Path
2025-12-16 01:45:01 -08:00
import os
2025-11-25 20:09:33 -08:00
import sys
2025-12-11 12:47:30 -08:00
import shutil
2025-12-16 01:45:01 -08:00
import subprocess
2025-12-20 23:57:44 -08:00
import tempfile
import threading
import time
import http.server
from urllib.parse import quote
2025-12-16 01:45:01 -08:00
import webbrowser
2025-12-20 23:57:44 -08:00
from urllib.parse import urljoin
from urllib.request import pathname2url
2025-11-25 20:09:33 -08:00
import pipeline as ctx
2025-12-16 23:23:43 -08:00
from . import _shared as sh
2025-12-11 19:04:02 -08:00
from SYS.logger import log, debug
from Store import Store
from SYS.config import resolve_output_dir
2025-12-11 12:47:30 -08:00
2025-12-16 23:23:43 -08:00
class Get_File(sh.Cmdlet):
2025-12-11 12:47:30 -08:00
"""Export files to local path via hash+store."""
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
def __init__(self) -> None:
"""Initialize get-file cmdlet."""
super().__init__(
name="get-file",
summary="Export file to local path",
usage="@1 | get-file -path C:\\Downloads",
arg=[
2025-12-20 02:12:45 -08:00
sh.SharedArgs.QUERY,
2025-12-16 23:23:43 -08:00
sh.SharedArgs.STORE,
sh.SharedArgs.PATH,
sh.CmdletArg(
"name",
description="Output filename (default: from metadata title)"
),
2025-12-11 12:47:30 -08:00
],
detail=[
"- Exports file from storage backend to local path",
2025-12-29 17:05:03 -08:00
'- Uses selected item\'s hash, or -query "hash:<sha256>"',
2025-12-11 12:47:30 -08:00
"- Preserves file extension and metadata",
],
exec=self.run,
)
self.register()
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Export file via hash+store backend."""
debug(f"[get-file] run() called with result type: {type(result)}")
2025-12-16 23:23:43 -08:00
parsed = sh.parse_cmdlet_args(args, self)
2025-12-11 12:47:30 -08:00
debug(f"[get-file] parsed args: {parsed}")
2025-12-20 02:12:45 -08:00
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("Error: -query must be of the form hash:<sha256>")
return 1
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
# Extract hash and store from result or args
2025-12-20 02:12:45 -08:00
file_hash = query_hash or sh.get_field(result, "hash")
2025-12-16 23:23:43 -08:00
store_name = parsed.get("store") or sh.get_field(result, "store")
2025-12-11 12:47:30 -08:00
output_path = parsed.get("path")
output_name = parsed.get("name")
2025-12-29 17:05:03 -08:00
2025-12-20 23:57:44 -08:00
debug(f"[get-file] file_hash={file_hash} store_name={store_name}")
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
if not file_hash:
log(
'Error: No file hash provided (pipe an item or use -query "hash:<sha256>")'
)
2025-12-11 12:47:30 -08:00
return 1
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
if not store_name:
log("Error: No store name provided")
return 1
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
# Normalize hash
2025-12-16 23:23:43 -08:00
file_hash = sh.normalize_hash(file_hash)
2025-11-25 20:09:33 -08:00
if not file_hash:
2025-12-11 12:47:30 -08:00
log("Error: Invalid hash format")
2025-11-25 20:09:33 -08:00
return 1
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
debug(f"[get-file] Getting storage backend: {store_name}")
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
# Get storage backend
2025-12-11 19:04:02 -08:00
store = Store(config)
backend = store[store_name]
2025-12-11 12:47:30 -08:00
debug(f"[get-file] Backend retrieved: {type(backend).__name__}")
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
# Get file metadata to determine name and extension
debug(f"[get-file] Getting metadata for hash...")
metadata = backend.get_metadata(file_hash)
if not metadata:
2025-12-20 23:57:44 -08:00
log(f"Error: File metadata not found for hash {file_hash}")
2025-11-25 20:09:33 -08:00
return 1
2025-12-29 17:05:03 -08:00
debug(
f"[get-file] Metadata retrieved: title={metadata.get('title')}, ext={metadata.get('ext')}"
)
2025-12-16 01:45:01 -08:00
def resolve_display_title() -> str:
candidates = [
sh.get_field(result,
"title"),
sh.get_field(result,
"name"),
sh.get_field(result,
"filename"),
(metadata.get("title") if isinstance(metadata,
dict) else None),
(metadata.get("name") if isinstance(metadata,
dict) else None),
(metadata.get("filename") if isinstance(metadata,
dict) else None),
2025-12-16 01:45:01 -08:00
]
for candidate in candidates:
if candidate is None:
continue
text = str(candidate).strip()
if text:
return text
return ""
2025-12-29 17:05:03 -08:00
2025-12-20 23:57:44 -08:00
debug(f"[get-file] Calling backend.get_file({file_hash})")
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
# Get file from backend (may return Path or URL string depending on backend)
source_path = backend.get_file(file_hash)
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
debug(f"[get-file] backend.get_file returned: {source_path}")
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
# Check if backend returned a URL (HydrusNetwork case)
if isinstance(source_path,
str) and (source_path.startswith("http://")
or source_path.startswith("https://")):
2025-12-16 01:45:01 -08:00
# Hydrus backend returns a URL; open it only for this explicit user action.
try:
webbrowser.open(source_path)
except Exception as exc:
log(f"Error opening browser: {exc}", file=sys.stderr)
else:
2025-12-22 02:11:53 -08:00
debug(f"Opened in browser: {source_path}", file=sys.stderr)
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
# Emit result for pipeline
2025-12-29 17:05:03 -08:00
ctx.emit(
{
"hash": file_hash,
"store": store_name,
"url": source_path,
"title": resolve_display_title() or "Opened",
}
)
2025-12-11 12:47:30 -08:00
return 0
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
# Otherwise treat as file path (local/folder backends)
if isinstance(source_path, str):
source_path = Path(source_path)
2025-12-29 17:05:03 -08:00
2025-12-16 01:45:01 -08:00
if not source_path or not source_path.exists():
2025-12-20 23:57:44 -08:00
log(f"Error: Backend could not retrieve file for hash {file_hash}")
2025-12-16 01:45:01 -08:00
return 1
# Folder store UX: without -path, just open the file in the default app.
# Only export/copy when -path is explicitly provided.
backend_name = type(backend).__name__
is_folder_backend = backend_name.lower() == "folder"
if is_folder_backend and not output_path:
display_title = resolve_display_title() or source_path.stem or "Opened"
ext_for_emit = metadata.get("ext") or source_path.suffix.lstrip(".")
self._open_file_default(source_path)
log(f"Opened: {source_path}", file=sys.stderr)
2025-12-29 17:05:03 -08:00
ctx.emit(
{
"hash": file_hash,
"store": store_name,
"path": str(source_path),
"title": str(display_title),
"ext": str(ext_for_emit or ""),
}
)
2025-12-16 01:45:01 -08:00
debug("[get-file] Completed successfully")
return 0
# Otherwise: export/copy to output_dir.
2025-12-11 12:47:30 -08:00
if output_path:
output_dir = Path(output_path).expanduser()
2025-11-25 20:09:33 -08:00
else:
2025-12-11 12:47:30 -08:00
output_dir = resolve_output_dir(config)
2025-12-16 01:45:01 -08:00
2025-12-11 12:47:30 -08:00
debug(f"[get-file] Output dir: {output_dir}")
output_dir.mkdir(parents=True, exist_ok=True)
2025-12-16 01:45:01 -08:00
# Determine output filename (only when exporting)
if output_name:
filename = output_name
else:
2025-12-29 17:05:03 -08:00
title = (
(metadata.get("title") if isinstance(metadata,
dict) else None)
or resolve_display_title() or "export"
2025-12-29 17:05:03 -08:00
)
2025-12-16 01:45:01 -08:00
filename = self._sanitize_filename(title)
# Add extension if metadata has it
ext = metadata.get("ext")
if ext and not filename.endswith(ext):
2025-12-29 17:05:03 -08:00
if not ext.startswith("."):
ext = "." + ext
2025-12-16 01:45:01 -08:00
filename += ext
dest_path = self._unique_path(output_dir / filename)
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
# Copy file to destination
debug(f"[get-file] Copying {source_path} -> {dest_path}", file=sys.stderr)
shutil.copy2(source_path, dest_path)
2025-12-16 01:45:01 -08:00
2025-12-11 12:47:30 -08:00
log(f"Exported: {dest_path}", file=sys.stderr)
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
# Emit result for pipeline
2025-12-29 17:05:03 -08:00
ctx.emit(
{
"hash": file_hash,
"store": store_name,
"path": str(dest_path),
"title": filename,
}
)
2025-12-11 12:47:30 -08:00
debug(f"[get-file] Completed successfully")
return 0
2025-12-16 01:45:01 -08:00
def _open_file_default(self, path: Path) -> None:
"""Open a local file in the OS default application."""
try:
2025-12-20 23:57:44 -08:00
suffix = str(path.suffix or "").lower()
if sys.platform.startswith("win"):
# On Windows, file associations for common media types can point at
# editors (Paint/VS Code). Prefer opening a localhost URL.
if self._open_local_file_in_browser_via_http(path):
return
2025-12-29 17:05:03 -08:00
if suffix in {
".png",
".jpg",
".jpeg",
".gif",
".webp",
".bmp",
".tif",
".tiff",
".svg",
2025-12-29 17:05:03 -08:00
}:
2025-12-20 23:57:44 -08:00
# Use default web browser for images.
if self._open_image_in_default_browser(path):
return
2025-12-16 01:45:01 -08:00
if sys.platform.startswith("win"):
os.startfile(str(path)) # type: ignore[attr-defined]
return
if sys.platform == "darwin":
2025-12-29 17:05:03 -08:00
subprocess.Popen(
["open",
str(path)],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL
2025-12-29 17:05:03 -08:00
)
2025-12-16 01:45:01 -08:00
return
2025-12-29 17:05:03 -08:00
subprocess.Popen(
["xdg-open",
str(path)],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL
2025-12-29 17:05:03 -08:00
)
2025-12-16 01:45:01 -08:00
except Exception as exc:
log(f"Error opening file: {exc}", file=sys.stderr)
2025-12-20 23:57:44 -08:00
def _open_local_file_in_browser_via_http(self, file_path: Path) -> bool:
"""Serve a single local file via localhost HTTP and open in browser.
This avoids Windows file-association issues (e.g., PNG -> Paint, HTML -> VS Code).
The server is bound to 127.0.0.1 on an ephemeral port and is shut down after
a timeout.
"""
try:
resolved = file_path.resolve()
directory = resolved.parent
filename = resolved.name
except Exception:
return False
class OneFileHandler(http.server.SimpleHTTPRequestHandler):
2025-12-20 23:57:44 -08:00
def __init__(self, *handler_args, **handler_kwargs):
super().__init__(
*handler_args,
directory=str(directory),
**handler_kwargs
)
2025-12-20 23:57:44 -08:00
def log_message(self, format: str, *args) -> None: # noqa: A003
# Keep normal output clean.
return
def do_GET(self) -> None: # noqa: N802
if self.path in {"/",
""}:
2025-12-20 23:57:44 -08:00
self.path = "/" + filename
return super().do_GET()
if self.path == "/" + filename or self.path == "/" + quote(filename):
return super().do_GET()
self.send_error(404)
def do_HEAD(self) -> None: # noqa: N802
if self.path in {"/",
""}:
2025-12-20 23:57:44 -08:00
self.path = "/" + filename
return super().do_HEAD()
if self.path == "/" + filename or self.path == "/" + quote(filename):
return super().do_HEAD()
self.send_error(404)
try:
httpd = http.server.ThreadingHTTPServer(("127.0.0.1", 0), OneFileHandler)
except Exception:
return False
port = httpd.server_address[1]
url = f"http://127.0.0.1:{port}/{quote(filename)}"
# Run server in the background.
2025-12-29 17:05:03 -08:00
server_thread = threading.Thread(
target=httpd.serve_forever,
kwargs={
"poll_interval": 0.2
},
daemon=True
2025-12-29 17:05:03 -08:00
)
2025-12-20 23:57:44 -08:00
server_thread.start()
# Auto-shutdown after a timeout to avoid lingering servers.
def shutdown_later() -> None:
time.sleep(10 * 60)
try:
httpd.shutdown()
except Exception:
pass
try:
httpd.server_close()
except Exception:
pass
threading.Thread(target=shutdown_later, daemon=True).start()
try:
debug(f"[get-file] Opening via localhost: {url}")
return bool(webbrowser.open(url))
except Exception:
return False
def _open_image_in_default_browser(self, image_path: Path) -> bool:
"""Open an image file in the user's default web browser.
We intentionally avoid opening the image path directly on Windows because
file associations may point to editors/viewers (e.g., Paint). Instead we
generate a tiny HTML wrapper and open that (HTML is typically associated
with the default browser).
"""
try:
resolved = image_path.resolve()
image_url = urljoin("file:", pathname2url(str(resolved)))
except Exception:
return False
# Create a stable wrapper filename to reduce temp-file spam.
wrapper_path = Path(
tempfile.gettempdir()
) / f"medeia-open-image-{resolved.stem}.html"
2025-12-20 23:57:44 -08:00
try:
wrapper_path.write_text(
"\n".join(
[
"<!doctype html>",
2025-12-29 17:05:03 -08:00
'<meta charset="utf-8">',
2025-12-20 23:57:44 -08:00
f"<title>{resolved.name}</title>",
"<style>html,body{margin:0;padding:0;background:#000}img{display:block;max-width:100vw;max-height:100vh;margin:auto}</style>",
2025-12-29 17:05:03 -08:00
f'<img src="{image_url}" alt="{resolved.name}">',
2025-12-20 23:57:44 -08:00
]
),
encoding="utf-8",
)
except Exception:
return False
# Prefer localhost server when possible (reliable on Windows).
if self._open_local_file_in_browser_via_http(image_path):
return True
wrapper_url = wrapper_path.as_uri()
try:
return bool(webbrowser.open(wrapper_url))
except Exception:
return False
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
def _sanitize_filename(self, name: str) -> str:
"""Sanitize filename by removing invalid characters."""
allowed_chars = []
for ch in str(name):
if ch.isalnum() or ch in {"-",
"_",
" ",
"."}:
2025-12-11 12:47:30 -08:00
allowed_chars.append(ch)
2025-11-25 20:09:33 -08:00
else:
2025-12-29 17:05:03 -08:00
allowed_chars.append(" ")
2025-12-11 12:47:30 -08:00
# Collapse multiple spaces
2025-12-29 17:05:03 -08:00
sanitized = " ".join("".join(allowed_chars).split())
2025-12-11 12:47:30 -08:00
return sanitized or "export"
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
def _unique_path(self, path: Path) -> Path:
"""Generate unique path by adding (1), (2), etc. if file exists."""
if not path.exists():
return path
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
stem = path.stem
suffix = path.suffix
parent = path.parent
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
counter = 1
while True:
new_path = parent / f"{stem} ({counter}){suffix}"
if not new_path.exists():
return new_path
counter += 1
2025-12-01 01:10:16 -08:00
2025-12-11 12:47:30 -08:00
# Instantiate and register cmdlet
Add_File_Instance = Get_File()