From 9873280f0e5e2f0c83f7b75d09615b26fe0bb857 Mon Sep 17 00:00:00 2001 From: nose Date: Tue, 16 Dec 2025 01:45:01 -0800 Subject: [PATCH] d --- API/HydrusNetwork.py | 74 ++-- API/alldebrid.py | 252 ++++++++--- API/folder.py | 280 +++++++++---- CLI.py | 537 +++++++++++++++++++++--- MPV/mpv_ipc.py | 47 ++- Provider/alldebrid.py | 296 +++++++++++++ Provider/libgen.py | 518 +++++++++++++++++++---- Provider/matrix.py | 108 +++-- Provider/soulseek.py | 36 +- ProviderCore/registry.py | 2 + Store/HydrusNetwork.py | 84 ++-- TUI/modalscreen/download.py | 14 +- cmdlet/_shared.py | 45 +- cmdlet/add_file.py | 457 ++++++++++++++++++-- cmdlet/add_relationship.py | 757 +++++++++++++++++++++++++++------- cmdlet/add_tag.py | 14 +- cmdlet/check_file_status.py | 50 ++- cmdlet/delete_file.py | 57 ++- cmdlet/delete_relationship.py | 350 ++++++++++++---- cmdlet/download_file.py | 32 ++ cmdlet/download_media.py | 379 ++++++++++++++--- cmdlet/download_torrent.py | 95 ++++- cmdlet/get_file.py | 133 +++--- cmdlet/get_relationship.py | 514 +++++++++++------------ cmdlet/merge_file.py | 10 +- cmdlet/screen_shot.py | 60 ++- cmdlet/search_provider.py | 140 +++++-- cmdlet/search_store.py | 49 +-- cmdnat/matrix.py | 437 +++++++++++++++++++- cmdnat/pipe.py | 128 ++++-- config.py | 20 + metadata.py | 2 +- models.py | 96 +++-- pyproject.toml | 2 +- requirements.txt | 2 +- result_table.py | 59 ++- 36 files changed, 4911 insertions(+), 1225 deletions(-) create mode 100644 Provider/alldebrid.py diff --git a/API/HydrusNetwork.py b/API/HydrusNetwork.py index 630fa42..494d951 100644 --- a/API/HydrusNetwork.py +++ b/API/HydrusNetwork.py @@ -11,7 +11,7 @@ import subprocess import sys import time -from SYS.logger import log, debug +from SYS.logger import log from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS as GLOBAL_SUPPORTED_EXTENSIONS import tempfile import logging @@ -360,24 +360,24 @@ class HydrusNetwork: hashes = self._ensure_hashes(file_hashes) if len(hashes) == 1: body = {"hash": hashes[0], "url_to_add": url} - return self._post("/add_url/associate_url", data=body) + return self._post("/add_urls/associate_url", data=body) results: dict[str, Any] = {} for file_hash in hashes: body = {"hash": file_hash, "url_to_add": url} - results[file_hash] = self._post("/add_url/associate_url", data=body) + results[file_hash] = self._post("/add_urls/associate_url", data=body) return {"batched": results} def delete_url(self, file_hashes: Union[str, Iterable[str]], url: str) -> dict[str, Any]: hashes = self._ensure_hashes(file_hashes) if len(hashes) == 1: body = {"hash": hashes[0], "url_to_delete": url} - return self._post("/add_url/associate_url", data=body) + return self._post("/add_urls/associate_url", data=body) results: dict[str, Any] = {} for file_hash in hashes: body = {"hash": file_hash, "url_to_delete": url} - results[file_hash] = self._post("/add_url/associate_url", data=body) + results[file_hash] = self._post("/add_urls/associate_url", data=body) return {"batched": results} def set_notes( @@ -436,35 +436,61 @@ class HydrusNetwork: def set_relationship(self, hash_a: str, hash_b: str, relationship: Union[str, int], do_default_content_merge: bool = False) -> dict[str, Any]: """Set a relationship between two files in Hydrus. - + + This wraps Hydrus Client API: POST /manage_file_relationships/set_file_relationships. + + Hydrus relationship enum (per Hydrus developer API docs): + - 0: set as potential duplicates + - 1: set as false positives + - 2: set as same quality (duplicates) + - 3: set as alternates + - 4: set A as better (duplicates) + Args: - hash_a: First file hash - hash_b: Second file hash - relationship: Relationship type - can be string ("king", "alt", "related", etc) - or integer (0-4): - - 0 = duplicates - - 1 = alternate - - 2 = not_related - - 3 = related - - 4 = king - do_default_content_merge: Whether to perform default content merge - + hash_a: First file SHA256 hex + hash_b: Second file SHA256 hex + relationship: Relationship type as string or integer enum (0-4) + do_default_content_merge: Whether to perform default duplicate content merge + Returns: Response from Hydrus API """ # Convert string relationship types to integers if isinstance(relationship, str): rel_map = { - "duplicates": 0, - "duplicate": 0, - "alt": 1, - "alternate": 1, - "not_related": 2, - "not related": 2, + # Potential duplicates + "potential": 0, + "potentials": 0, + "potential duplicate": 0, + "potential duplicates": 0, + # False positives + "false positive": 1, + "false_positive": 1, + "false positives": 1, + "false_positives": 1, + "not related": 1, + "not_related": 1, + # Duplicates (same quality) + "duplicate": 2, + "duplicates": 2, + "same quality": 2, + "same_quality": 2, + "equal": 2, + # Alternates + "alt": 3, + "alternate": 3, + "alternates": 3, + "alternative": 3, "related": 3, + # Better/worse (duplicates) + "better": 4, + "a better": 4, + "a_better": 4, + # Back-compat: some older call sites used 'king' for primary. + # Hydrus does not accept 'king' as a relationship; this maps to 'A is better'. "king": 4, } - relationship = rel_map.get(relationship.lower(), 3) # Default to "related" (3) + relationship = rel_map.get(relationship.lower().strip(), 3) # Default to alternates body = { "relationships": [ diff --git a/API/alldebrid.py b/API/alldebrid.py index 0f4e202..434332c 100644 --- a/API/alldebrid.py +++ b/API/alldebrid.py @@ -13,7 +13,7 @@ from SYS.logger import log, debug import time import logging from typing import Any, Dict, Optional, Set, List, Sequence, Tuple -from urllib.parse import urlencode, urlparse +from urllib.parse import urlparse from .HTTP import HTTPClient logger = logging.getLogger(__name__) @@ -51,11 +51,34 @@ def _ping_alldebrid(base_url: str) -> Tuple[bool, Optional[str]]: class AllDebridClient: """Client for AllDebrid API.""" - # Try both v4 and v3 APIs - BASE_url = [ - "https://api.alldebrid.com/v4", - "https://api.alldebrid.com/v3", - ] + # Default to v4 for most endpoints. + # Some endpoints have a newer /v4.1/ variant (e.g., magnet/status, user/hosts, pin/get). + BASE_URL = "https://api.alldebrid.com/v4" + BASE_URL_V41 = "https://api.alldebrid.com/v4.1" + + # Endpoints documented as POST in v4 API. + _POST_ENDPOINTS: Set[str] = { + "pin/check", + "user/verif", + "user/verif/resend", + "user/notification/clear", + "link/infos", + "link/redirector", + "link/unlock", + "link/streaming", + "link/delayed", + "magnet/upload", + "magnet/upload/file", + "magnet/status", # v4.1 variant exists; method stays POST + "magnet/files", + "magnet/delete", + "magnet/restart", + "user/links/save", + "user/links/delete", + "user/history/delete", + "voucher/get", + "voucher/generate", + } def __init__(self, api_key: str): """Initialize AllDebrid client with API key. @@ -66,7 +89,7 @@ class AllDebridClient: self.api_key = api_key.strip() if not self.api_key: raise AllDebridError("AllDebrid API key is empty") - self.base_url = self.BASE_url[0] # Start with v4 + self.base_url = self.BASE_URL # Start with v4 # Init-time availability validation (cached per process) fingerprint = f"base:{self.base_url}" # /ping does not require the api key @@ -80,7 +103,13 @@ class AllDebridClient: if not ok: raise AllDebridError(reason or "AllDebrid unavailable") - def _request(self, endpoint: str, params: Optional[Dict[str, str]] = None) -> Dict[str, Any]: + def _request( + self, + endpoint: str, + params: Optional[Dict[str, Any]] = None, + *, + method: Optional[str] = None, + ) -> Dict[str, Any]: """Make a request to AllDebrid API. Args: @@ -95,25 +124,38 @@ class AllDebridClient: """ if params is None: params = {} - - # Add API key to params - params['apikey'] = self.api_key - + + # Determine HTTP method (v4 docs default to POST for most write/unlock endpoints). + if method is None: + method = "POST" if endpoint in self._POST_ENDPOINTS else "GET" + method = str(method).upper().strip() or "GET" + + # Auth header is the preferred mechanism per v4.1 docs. + # Keep apikey in params too for backward compatibility. + request_params: Dict[str, Any] = dict(params) + request_params["apikey"] = self.api_key + url = f"{self.base_url}/{endpoint}" - query_string = urlencode(params) - full_url = f"{url}?{query_string}" - - logger.debug(f"[AllDebrid] {endpoint} request to {full_url[:80]}...") + + # Avoid logging full URLs with query params (can leak apikey). + logger.debug(f"[AllDebrid] {method} {endpoint} @ {self.base_url}") try: - # Pass timeout to HTTPClient init, not to get() - with HTTPClient(timeout=30.0, headers={'User-Agent': 'downlow/1.0'}) as client: + headers = { + "User-Agent": "downlow/1.0", + "Authorization": f"Bearer {self.api_key}", + } + # Pass timeout to HTTPClient init. + with HTTPClient(timeout=30.0, headers=headers) as client: try: - response = client.get(full_url) + if method == "POST": + response = client.post(url, data=request_params) + else: + response = client.get(url, params=request_params) response.raise_for_status() except Exception as req_err: # Log detailed error info - logger.error(f"[AllDebrid] Request error to {full_url[:80]}: {req_err}", exc_info=True) + logger.error(f"[AllDebrid] Request error to {endpoint}: {req_err}", exc_info=True) if hasattr(req_err, 'response') and req_err.response is not None: # type: ignore try: error_body = req_err.response.content.decode('utf-8') # type: ignore @@ -190,13 +232,26 @@ class AllDebridClient: Raises: AllDebridError: If request fails """ - try: - response = self._request('host', {'name': hostname}) - - if response.get('status') == 'success': - return response.get('data', {}) - + # The v4 API does not expose a `/host` endpoint. Use `/hosts/domains` and + # check membership. + if not hostname: return {} + + try: + host = str(hostname).strip().lower() + if host.startswith("www."): + host = host[4:] + + domains = self.get_supported_hosters() + if not domains: + return {} + + for category in ("hosts", "streams", "redirectors"): + values = domains.get(category) + if isinstance(values, list) and any(str(d).lower() == host for d in values): + return {"supported": True, "category": category, "domain": host} + + return {"supported": False, "domain": host} except AllDebridError: raise except Exception as exc: @@ -212,7 +267,8 @@ class AllDebridClient: AllDebridError: If request fails """ try: - response = self._request('user/profile') + # v4 endpoint is `/user` + response = self._request('user') if response.get('status') == 'success': return response.get('data', {}) @@ -227,7 +283,8 @@ class AllDebridClient: """Get list of all supported hosters from AllDebrid API. Returns: - Dict mapping domain to host info (status, name, etc) + Dict with keys `hosts`, `streams`, `redirectors` each containing an array + of domains. Raises: AllDebridError: If request fails @@ -237,7 +294,6 @@ class AllDebridClient: if response.get('status') == 'success': data = response.get('data', {}) - # The API returns hosts keyed by domain return data if isinstance(data, dict) else {} return {} @@ -334,7 +390,7 @@ class AllDebridClient: # Use v4.1 endpoint for better response format # Temporarily override base_url for this request old_base = self.base_url - self.base_url = "https://api.alldebrid.com/v4.1" + self.base_url = self.BASE_URL_V41 try: response = self._request('magnet/status', {'id': str(magnet_id)}) @@ -358,8 +414,48 @@ class AllDebridClient: raise except Exception as exc: raise AllDebridError(f"Failed to get magnet status: {exc}") + + def magnet_list(self) -> List[Dict[str, Any]]: + """List magnets stored in the AllDebrid account. + + The AllDebrid API returns an array of magnets when calling the status + endpoint without an id. + + Returns: + List of magnet objects. + """ + try: + # Use v4.1 endpoint for better response format + old_base = self.base_url + self.base_url = self.BASE_URL_V41 + try: + response = self._request('magnet/status') + finally: + self.base_url = old_base + + if response.get('status') != 'success': + return [] + + data = response.get('data', {}) + magnets = data.get('magnets', []) + + if isinstance(magnets, list): + return [m for m in magnets if isinstance(m, dict)] + + # Some API variants may return a dict. + if isinstance(magnets, dict): + # If it's a single magnet dict, wrap it; if it's an id->magnet mapping, return values. + if 'id' in magnets: + return [magnets] + return [m for m in magnets.values() if isinstance(m, dict)] + + return [] + except AllDebridError: + raise + except Exception as exc: + raise AllDebridError(f"Failed to list magnets: {exc}") - def magnet_status_live(self, magnet_id: int, session: int = None, counter: int = 0) -> Dict[str, Any]: + def magnet_status_live(self, magnet_id: int, session: Optional[int] = None, counter: int = 0) -> Dict[str, Any]: """Get live status of a magnet using delta sync mode. The live mode endpoint provides real-time progress by only sending @@ -388,21 +484,32 @@ class AllDebridClient: raise AllDebridError(f"Invalid magnet ID: {magnet_id}") try: - # For single magnet queries, just use regular endpoint with ID - # The "live mode" with session/counter is for multi-magnet dashboards - # where bandwidth savings from diffs matter - response = self._request('magnet/status', {'id': magnet_id}) - + # v4.1 is the up-to-date endpoint for magnet/status. + old_base = self.base_url + self.base_url = self.BASE_URL_V41 + try: + payload: Dict[str, Any] = {"id": str(magnet_id)} + if session is not None: + payload["session"] = str(int(session)) + payload["counter"] = str(int(counter)) + response = self._request('magnet/status', payload) + finally: + self.base_url = old_base + if response.get('status') == 'success': data = response.get('data', {}) magnets = data.get('magnets', []) - - # Handle list response + + # For specific magnet id, return the first match from the array. if isinstance(magnets, list) and len(magnets) > 0: return magnets[0] - + + # Some API variants may return a dict. + if isinstance(magnets, dict) and magnets: + return magnets + raise AllDebridError(f"No magnet found with ID {magnet_id}") - + raise AllDebridError(f"API error: {response.get('error', 'Unknown')}") except AllDebridError: raise @@ -784,28 +891,65 @@ def unlock_link_cmdlet(result: Any, args: Sequence[str], config: Dict[str, Any]) Returns: 0 on success, 1 on failure """ - try: - from .link_utils import ( - extract_link, - get_api_key, - add_direct_link_to_result, - ) - except ImportError as e: - log(f"Required modules unavailable: {e}", file=sys.stderr) - return 1 - + def _extract_link_from_args_or_result(result_obj: Any, argv: Sequence[str]) -> Optional[str]: + # Prefer an explicit URL in args. + for a in argv or []: + if isinstance(a, str) and a.startswith(("http://", "https://")): + return a.strip() + + # Fall back to common pipeline fields. + if isinstance(result_obj, dict): + for key in ("url", "source_url", "path", "target"): + v = result_obj.get(key) + if isinstance(v, str) and v.startswith(("http://", "https://")): + return v.strip() + return None + + def _get_alldebrid_api_key_from_config(cfg: Dict[str, Any]) -> Optional[str]: + # Current config format + try: + provider_cfg = cfg.get("provider") if isinstance(cfg, dict) else None + ad_cfg = provider_cfg.get("alldebrid") if isinstance(provider_cfg, dict) else None + api_key = ad_cfg.get("api_key") if isinstance(ad_cfg, dict) else None + if isinstance(api_key, str) and api_key.strip(): + return api_key.strip() + except Exception: + pass + + # Legacy config format fallback (best-effort) + try: + debrid_cfg = cfg.get("Debrid") if isinstance(cfg, dict) else None + api_key = None + if isinstance(debrid_cfg, dict): + api_key = debrid_cfg.get("All-debrid") or debrid_cfg.get("AllDebrid") + if isinstance(api_key, str) and api_key.strip(): + return api_key.strip() + except Exception: + pass + + return None + + def _add_direct_link_to_result(result_obj: Any, direct_link: str, original_link: str) -> None: + if not isinstance(direct_link, str) or not direct_link.strip(): + return + if isinstance(result_obj, dict): + # Keep original and promote unlocked link to the fields commonly used downstream. + result_obj.setdefault("source_url", original_link) + result_obj["url"] = direct_link + result_obj["path"] = direct_link + # Get link from args or result - link = extract_link(result, args) + link = _extract_link_from_args_or_result(result, args) if not link: log("No valid URL provided", file=sys.stderr) return 1 # Get AllDebrid API key from config - api_key = get_api_key(config, "AllDebrid", "Debrid.All-debrid") + api_key = _get_alldebrid_api_key_from_config(config) if not api_key: - log("AllDebrid API key not configured in Debrid.All-debrid", file=sys.stderr) + log("AllDebrid API key not configured (provider.alldebrid.api_key)", file=sys.stderr) return 1 # Try to unlock the link @@ -816,7 +960,7 @@ def unlock_link_cmdlet(result: Any, args: Sequence[str], config: Dict[str, Any]) debug(f"✓ Direct link: {direct_link}") # Update result with direct link - add_direct_link_to_result(result, direct_link, link) + _add_direct_link_to_result(result, direct_link, link) # Return the updated result via pipeline context # Note: The cmdlet wrapper will handle emitting to pipeline diff --git a/API/folder.py b/API/folder.py index 27d65c5..d39024e 100644 --- a/API/folder.py +++ b/API/folder.py @@ -609,12 +609,153 @@ class API_folder_store: try: metadata[field] = json.loads(metadata[field]) except (json.JSONDecodeError, TypeError): - metadata[field] = [] if field == 'url' else [] + metadata[field] = [] if field == 'url' else {} + + # Ensure relationships is always a dict + if metadata.get('relationships') is None: + metadata['relationships'] = {} + if not isinstance(metadata.get('relationships'), dict): + metadata['relationships'] = {} return metadata except Exception as e: logger.error(f"Error getting metadata for hash {file_hash}: {e}", exc_info=True) return None + + def set_relationship_by_hash(self, file_hash: str, related_file_hash: str, rel_type: str = "alt", *, bidirectional: bool = True) -> None: + """Set a relationship between two files by hash. + + This is the store/hash-first API. It avoids any dependency on local filesystem + paths and only requires that both hashes exist in the DB. + """ + try: + file_hash = str(file_hash or "").strip().lower() + related_file_hash = str(related_file_hash or "").strip().lower() + rel_type = str(rel_type or "alt").strip() or "alt" + + if not file_hash or not related_file_hash: + raise ValueError("Missing file hash for relationship") + if file_hash == related_file_hash: + return + + cursor = self.connection.cursor() + + # Ensure both hashes exist in files table (metadata has FK to files) + cursor.execute("SELECT 1 FROM files WHERE hash = ?", (file_hash,)) + if not cursor.fetchone(): + raise ValueError(f"Hash not found in store DB: {file_hash}") + cursor.execute("SELECT 1 FROM files WHERE hash = ?", (related_file_hash,)) + if not cursor.fetchone(): + raise ValueError(f"Hash not found in store DB: {related_file_hash}") + + # Load current relationships for the main file + cursor.execute("SELECT relationships FROM metadata WHERE hash = ?", (file_hash,)) + row = cursor.fetchone() + relationships_str = row[0] if row else None + + try: + relationships = json.loads(relationships_str) if relationships_str else {} + except (json.JSONDecodeError, TypeError): + relationships = {} + if not isinstance(relationships, dict): + relationships = {} + + relationships.setdefault(rel_type, []) + if not isinstance(relationships[rel_type], list): + relationships[rel_type] = [] + if related_file_hash not in relationships[rel_type]: + relationships[rel_type].append(related_file_hash) + + cursor.execute( + """ + INSERT INTO metadata (hash, relationships) + VALUES (?, ?) + ON CONFLICT(hash) DO UPDATE SET + relationships = excluded.relationships, + time_modified = CURRENT_TIMESTAMP, + updated_at = CURRENT_TIMESTAMP + """, + (file_hash, json.dumps(relationships)), + ) + + if bidirectional: + # Update the related file as well + cursor.execute("SELECT relationships FROM metadata WHERE hash = ?", (related_file_hash,)) + row2 = cursor.fetchone() + relationships_str2 = row2[0] if row2 else None + try: + reverse_relationships = json.loads(relationships_str2) if relationships_str2 else {} + except (json.JSONDecodeError, TypeError): + reverse_relationships = {} + if not isinstance(reverse_relationships, dict): + reverse_relationships = {} + + reverse_relationships.setdefault(rel_type, []) + if not isinstance(reverse_relationships[rel_type], list): + reverse_relationships[rel_type] = [] + if file_hash not in reverse_relationships[rel_type]: + reverse_relationships[rel_type].append(file_hash) + + cursor.execute( + """ + INSERT INTO metadata (hash, relationships) + VALUES (?, ?) + ON CONFLICT(hash) DO UPDATE SET + relationships = excluded.relationships, + time_modified = CURRENT_TIMESTAMP, + updated_at = CURRENT_TIMESTAMP + """, + (related_file_hash, json.dumps(reverse_relationships)), + ) + + self.connection.commit() + except Exception as e: + logger.error(f"Error setting relationship by hash: {e}", exc_info=True) + raise + + def find_files_pointing_to_hash(self, target_hash: str) -> List[Dict[str, Any]]: + """Find all files that have a relationship pointing to the target hash.""" + try: + target_hash = str(target_hash or "").strip().lower() + if not target_hash: + return [] + + cursor = self.connection.cursor() + + cursor.execute( + """ + SELECT f.hash, f.file_path, m.relationships + FROM metadata m + JOIN files f ON m.hash = f.hash + WHERE m.relationships LIKE ? + """, + (f"%{target_hash}%",), + ) + + results: List[Dict[str, Any]] = [] + for row in cursor.fetchall(): + src_hash = row[0] + src_path = row[1] + rels_json = row[2] + try: + rels = json.loads(rels_json) if rels_json else {} + except (json.JSONDecodeError, TypeError): + continue + if not isinstance(rels, dict): + continue + for r_type, hashes in rels.items(): + if not isinstance(hashes, list): + continue + if target_hash in [str(h or "").strip().lower() for h in hashes]: + results.append({ + "hash": src_hash, + "path": src_path, + "type": r_type, + }) + return results + except Exception as e: + logger.error(f"Error finding files pointing to hash {target_hash}: {e}", exc_info=True) + return [] def save_metadata(self, file_path: Path, metadata: Dict[str, Any]) -> None: """Save metadata for a file.""" @@ -961,7 +1102,7 @@ class API_folder_store: logger.error(f"Error updating metadata for hash {file_hash}: {e}", exc_info=True) raise - def set_relationship(self, file_path: Path, related_file_path: Path, rel_type: str = "alt") -> None: + def set_relationship(self, file_path: Path, related_file_path: Path, rel_type: str = "alt", *, bidirectional: bool = True) -> None: """Set a relationship between two local files. Args: @@ -1018,47 +1159,50 @@ class API_folder_store: logger.debug(f"Set {rel_type} relationship: {str_path} ({file_hash}) -> {str_related_path} ({related_file_hash})") - # Set reverse relationship (bidirectional) - # For 'alt' and 'related', the reverse is the same - # For 'king', the reverse is 'subject' (or we just use 'alt' for simplicity as Hydrus does) - # Let's use the same type for now to keep it simple and consistent with Hydrus 'alternates' - reverse_type = rel_type - - # Update the related file - cursor.execute(""" - SELECT relationships FROM metadata WHERE hash = ? - """, (related_file_hash,)) - - row = cursor.fetchone() - relationships_str = row[0] if row else None - - try: - if relationships_str: - reverse_relationships = json.loads(relationships_str) - else: + if bidirectional: + # Set reverse relationship (bidirectional) + # For 'alt' and 'related', the reverse is the same + # For 'king', the reverse is 'subject' (or we just use 'alt' for simplicity as Hydrus does) + # Let's use the same type for now to keep it simple and consistent with Hydrus 'alternates' + reverse_type = rel_type + + # Update the related file + cursor.execute(""" + SELECT relationships FROM metadata WHERE hash = ? + """, (related_file_hash,)) + + row = cursor.fetchone() + relationships_str = row[0] if row else None + + try: + if relationships_str: + reverse_relationships = json.loads(relationships_str) + else: + reverse_relationships = {} + except (json.JSONDecodeError, TypeError): reverse_relationships = {} - except (json.JSONDecodeError, TypeError): - reverse_relationships = {} - - if not isinstance(reverse_relationships, dict): - reverse_relationships = {} - - if reverse_type not in reverse_relationships: - reverse_relationships[reverse_type] = [] - - if file_hash not in reverse_relationships[reverse_type]: - reverse_relationships[reverse_type].append(file_hash) - - # Save the updated reverse relationships - cursor.execute(""" - INSERT INTO metadata (hash, relationships) - VALUES (?, ?) - ON CONFLICT(hash) DO UPDATE SET - relationships = excluded.relationships, - time_modified = CURRENT_TIMESTAMP - """, (related_file_hash, json.dumps(reverse_relationships))) - - self.connection.commit() + + if not isinstance(reverse_relationships, dict): + reverse_relationships = {} + + if reverse_type not in reverse_relationships: + reverse_relationships[reverse_type] = [] + + if file_hash not in reverse_relationships[reverse_type]: + reverse_relationships[reverse_type].append(file_hash) + + # Save the updated reverse relationships + cursor.execute(""" + INSERT INTO metadata (hash, relationships) + VALUES (?, ?) + ON CONFLICT(hash) DO UPDATE SET + relationships = excluded.relationships, + time_modified = CURRENT_TIMESTAMP + """, (related_file_hash, json.dumps(reverse_relationships))) + + self.connection.commit() + else: + self.connection.commit() except Exception as e: logger.error(f"Error setting relationship: {e}", exc_info=True) @@ -1074,44 +1218,22 @@ class API_folder_store: List of dicts with {path, type} for files pointing to target """ try: - # Get the hash of the target file - target_hash = sha256_file(target_path) + # Prefer the DB's stored identity hash for the target. + target_hash = None + try: + target_hash = self.get_file_hash(target_path) + except Exception: + target_hash = None + + # Fall back to hashing bytes if the path isn't known to the DB. + if not target_hash: + target_hash = sha256_file(target_path) + if not target_hash: logger.warning(f"Cannot find files pointing to {target_path}: unable to compute hash") return [] - - cursor = self.connection.cursor() - - # Scan all metadata (this might be slow on huge DBs but fine for local library) - # We select file_path and relationships json - cursor.execute(""" - SELECT f.file_path, m.relationships - FROM metadata m - JOIN files f ON m.hash = f.hash - WHERE m.relationships LIKE ? - """, (f"%{target_hash}%",)) - - results = [] - - for row in cursor.fetchall(): - f_path = row[0] - rels_json = row[1] - - try: - rels = json.loads(rels_json) - if isinstance(rels, dict): - for r_type, hashes in rels.items(): - if isinstance(hashes, list): - # Check if target hash is in this relationship type - if target_hash in hashes: - results.append({ - "path": f_path, - "type": r_type - }) - except (json.JSONDecodeError, TypeError): - continue - - return results + + return self.find_files_pointing_to_hash(target_hash) except Exception as e: logger.error(f"Error finding files pointing to {target_path}: {e}", exc_info=True) return [] @@ -2630,7 +2752,7 @@ class LocalLibrarySearchOptimizer: return None return self.db.search_hash(file_hash) - def set_relationship(self, file_path: Path, related_file_path: Path, rel_type: str = "alt") -> None: + def set_relationship(self, file_path: Path, related_file_path: Path, rel_type: str = "alt", *, bidirectional: bool = True) -> None: """Set a relationship between two files in the database. Delegates to LocalLibraryDB.set_relationship(). @@ -2642,7 +2764,7 @@ class LocalLibrarySearchOptimizer: """ if not self.db: return - self.db.set_relationship(file_path, related_file_path, rel_type) + self.db.set_relationship(file_path, related_file_path, rel_type, bidirectional=bidirectional) def find_files_pointing_to(self, target_path: Path) -> List[Dict[str, Any]]: """Find all files that have a relationship pointing to the target path.""" diff --git a/CLI.py b/CLI.py index da7e441..7672226 100644 --- a/CLI.py +++ b/CLI.py @@ -207,16 +207,70 @@ def _load_cli_config() -> Dict[str, Any]: return {} -def _get_table_title_for_command(cmd_name: str, emitted_items: Optional[List[Any]] = None) -> str: +def _get_table_title_for_command( + cmd_name: str, + emitted_items: Optional[List[Any]] = None, + cmd_args: Optional[List[str]] = None, +) -> str: """Generate a dynamic table title based on the command and emitted items. Args: cmd_name: The command name (e.g., 'search-file', 'get-tag', 'get-file') emitted_items: The items being displayed + cmd_args: Arguments passed to the command (when available) Returns: A descriptive title for the result table """ + # Prefer argument-aware titles where possible so table history is self-describing. + if cmd_name in ('search-provider', 'search_provider') and cmd_args: + # Support both positional form: + # search-provider + # and flag form: + # search-provider -provider + provider: str = "" + query: str = "" + tokens = [str(a) for a in (cmd_args or [])] + pos: List[str] = [] + i = 0 + while i < len(tokens): + low = tokens[i].lower() + if low in {"-provider", "--provider"} and i + 1 < len(tokens): + provider = str(tokens[i + 1]).strip() + i += 2 + continue + if low in {"-query", "--query"} and i + 1 < len(tokens): + query = str(tokens[i + 1]).strip() + i += 2 + continue + if low in {"-limit", "--limit"} and i + 1 < len(tokens): + i += 2 + continue + if not str(tokens[i]).startswith("-"): + pos.append(str(tokens[i])) + i += 1 + + if not provider and pos: + provider = str(pos[0]).strip() + pos = pos[1:] + if not query and pos: + query = " ".join(pos).strip() + + if not provider or not query: + # Fall back to generic mapping below. + provider = "" + query = "" + + provider_lower = provider.lower() + if provider_lower == 'youtube': + provider_label = 'Youtube' + elif provider_lower == 'openlibrary': + provider_label = 'OpenLibrary' + else: + provider_label = provider[:1].upper() + provider[1:] if provider else 'Provider' + if provider and query: + return f"{provider_label}: {query}".strip().rstrip(':') + # Mapping of commands to title templates title_map = { 'search-file': 'Results', @@ -518,6 +572,24 @@ def _get_arg_choices(cmd_name: str, arg_name: str) -> List[str]: # Dynamic search providers if normalized_arg == "provider": try: + canonical_cmd = (cmd_name or "").replace("_", "-").lower() + + # cmdlet-aware provider choices: + # - search-provider: search providers + # - add-file: file providers (0x0, matrix) + if canonical_cmd in {"search-provider"}: + from ProviderCore.registry import list_search_providers + providers = list_search_providers(_load_cli_config()) + available = [name for name, is_ready in providers.items() if is_ready] + return sorted(available) if available else sorted(providers.keys()) + + if canonical_cmd in {"add-file"}: + from ProviderCore.registry import list_file_providers + providers = list_file_providers(_load_cli_config()) + available = [name for name, is_ready in providers.items() if is_ready] + return sorted(available) if available else sorted(providers.keys()) + + # Default behavior (legacy): merge search providers and metadata providers. from ProviderCore.registry import list_search_providers providers = list_search_providers(_load_cli_config()) available = [name for name, is_ready in providers.items() if is_ready] @@ -570,6 +642,7 @@ if ( """Generate completions for the current input.""" text = document.text_before_cursor tokens = text.split() + ends_with_space = bool(text) and text[-1].isspace() # Respect pipeline stages: only use tokens after the last '|' last_pipe = -1 @@ -586,6 +659,31 @@ if ( # Single token at this stage -> suggest command names/keywords if len(stage_tokens) == 1: current = stage_tokens[0].lower() + + # If the user has finished typing the command and added a space, + # complete that command's flags (or sub-choices) instead of command names. + if ends_with_space: + cmd_name = current.replace("_", "-") + if cmd_name in {"help"}: + for cmd in self.cmdlet_names: + yield CompletionType(cmd, start_position=0) + return + + arg_names = _get_cmdlet_args(cmd_name) + logical_seen: Set[str] = set() + for arg in arg_names: + arg_low = arg.lower() + if arg_low.startswith("--"): + continue + logical = arg.lstrip("-").lower() + if logical in logical_seen: + continue + yield CompletionType(arg, start_position=0) + logical_seen.add(logical) + + yield CompletionType("-help", start_position=0) + return + for cmd in self.cmdlet_names: if cmd.startswith(current): yield CompletionType(cmd, start_position=-len(current)) @@ -596,8 +694,12 @@ if ( # Otherwise treat first token of stage as command and complete its args cmd_name = stage_tokens[0].replace("_", "-").lower() - current_token = stage_tokens[-1].lower() - prev_token = stage_tokens[-2].lower() if len(stage_tokens) > 1 else "" + if ends_with_space: + current_token = "" + prev_token = stage_tokens[-1].lower() + else: + current_token = stage_tokens[-1].lower() + prev_token = stage_tokens[-2].lower() if len(stage_tokens) > 1 else "" choices = _get_arg_choices(cmd_name, prev_token) if choices: @@ -611,22 +713,29 @@ if ( for arg in arg_names: arg_low = arg.lower() + prefer_single_dash = current_token in {"", "-"} + # If the user has only typed '-', prefer single-dash flags (e.g. -url) # and avoid suggesting both -name and --name for the same logical arg. - if current_token == "-" and arg_low.startswith("--"): + if prefer_single_dash and arg_low.startswith("--"): continue logical = arg.lstrip("-").lower() - if current_token == "-" and logical in logical_seen: + if prefer_single_dash and logical in logical_seen: continue if arg_low.startswith(current_token): yield CompletionType(arg, start_position=-len(current_token)) - if current_token == "-": + if prefer_single_dash: logical_seen.add(logical) - if "--help".startswith(current_token): - yield CompletionType("--help", start_position=-len(current_token)) + # Help completion: prefer -help unless user explicitly starts '--' + if current_token.startswith("--"): + if "--help".startswith(current_token): + yield CompletionType("--help", start_position=-len(current_token)) + else: + if "-help".startswith(current_token): + yield CompletionType("-help", start_position=-len(current_token)) async def get_completions_async(self, document: Document, complete_event): # type: ignore[override] for completion in self.get_completions(document, complete_event): @@ -681,6 +790,45 @@ def _create_cmdlet_cli(): return None app = typer.Typer(help="Medeia-Macina CLI") + + def _complete_search_provider(ctx, param, incomplete: str): # pragma: no cover + """Shell completion for --provider values on the Typer search-provider command.""" + try: + import click + from click.shell_completion import CompletionItem + except Exception: + return [] + + try: + from ProviderCore.registry import list_search_providers + providers = list_search_providers(_load_cli_config()) + available = [n for n, ok in (providers or {}).items() if ok] + choices = sorted(available) if available else sorted((providers or {}).keys()) + except Exception: + choices = [] + + inc = (incomplete or "").lower() + out = [] + for name in choices: + if not name: + continue + if name.lower().startswith(inc): + out.append(CompletionItem(name)) + return out + + @app.command("search-provider") + def search_provider( + provider: str = typer.Option( + ..., "--provider", "-p", + help="Provider name (bandcamp, libgen, soulseek, youtube)", + shell_complete=_complete_search_provider, + ), + query: str = typer.Argument(..., help="Search query (quote for spaces)"), + limit: int = typer.Option(50, "--limit", "-l", help="Maximum results to return"), + ): + """Search external providers (Typer wrapper around the cmdlet).""" + # Delegate to the existing cmdlet so behavior stays consistent. + _execute_cmdlet("search-provider", ["-provider", provider, query, "-limit", str(limit)]) @app.command("pipeline") def pipeline( @@ -804,6 +952,18 @@ def _create_cmdlet_cli(): block = provider_cfg.get(str(name).strip().lower()) return isinstance(block, dict) and bool(block) + def _ping_url(url: str, timeout: float = 3.0) -> tuple[bool, str]: + try: + from API.HTTP import HTTPClient + + with HTTPClient(timeout=timeout, retries=1) as client: + resp = client.get(url, allow_redirects=True) + code = int(getattr(resp, "status_code", 0) or 0) + ok = 200 <= code < 500 + return ok, f"{url} (HTTP {code})" + except Exception as exc: + return False, f"{url} ({type(exc).__name__})" + # Load config and initialize debug logging config = {} try: @@ -894,6 +1054,169 @@ def _create_cmdlet_cli(): detail = (url_val + (" - " if url_val else "")) + (err or "Unavailable") _add_startup_check(status, name_key, "hydrusnetwork", detail) + # Configured providers (dynamic): show any [provider=...] blocks. + # This complements store checks and avoids hardcoding per-provider rows. + provider_cfg = config.get("provider") if isinstance(config, dict) else None + if isinstance(provider_cfg, dict) and provider_cfg: + try: + from ProviderCore.registry import ( + list_search_providers, + list_file_providers, + ) + except Exception: + list_search_providers = None # type: ignore + list_file_providers = None # type: ignore + + try: + from Provider.metadata_provider import list_metadata_providers + except Exception: + list_metadata_providers = None # type: ignore + + search_availability = {} + file_availability = {} + meta_availability = {} + + try: + if list_search_providers is not None: + search_availability = list_search_providers(config) or {} + except Exception: + search_availability = {} + + try: + if list_file_providers is not None: + file_availability = list_file_providers(config) or {} + except Exception: + file_availability = {} + + try: + if list_metadata_providers is not None: + meta_availability = list_metadata_providers(config) or {} + except Exception: + meta_availability = {} + + def _provider_display_name(key: str) -> str: + k = (key or "").strip() + low = k.lower() + if low == "openlibrary": + return "OpenLibrary" + if low == "alldebrid": + return "AllDebrid" + if low == "youtube": + return "YouTube" + return k[:1].upper() + k[1:] if k else "Provider" + + # Avoid duplicating the existing Matrix row. + already_checked = {"matrix"} + + def _default_provider_ping_targets(provider_key: str) -> list[str]: + prov = (provider_key or "").strip().lower() + if prov == "openlibrary": + return ["https://openlibrary.org"] + if prov == "youtube": + return ["https://www.youtube.com"] + if prov == "bandcamp": + return ["https://bandcamp.com"] + if prov == "libgen": + try: + from Provider.libgen import MIRRORS + + mirrors = [str(x).rstrip("/") for x in (MIRRORS or []) if str(x).strip()] + return [m + "/json.php" for m in mirrors] + except Exception: + return [] + return [] + + def _ping_first(urls: list[str]) -> tuple[bool, str]: + for u in urls: + ok, detail = _ping_url(u) + if ok: + return True, detail + if urls: + ok, detail = _ping_url(urls[0]) + return ok, detail + return False, "No ping target" + + for provider_name in provider_cfg.keys(): + prov = str(provider_name or "").strip().lower() + if not prov or prov in already_checked: + continue + + display = _provider_display_name(prov) + + # Special-case AllDebrid to show a richer detail and validate connectivity. + if prov == "alldebrid": + try: + from Provider.alldebrid import _get_debrid_api_key # type: ignore + + api_key = _get_debrid_api_key(config) + if not api_key: + _add_startup_check("DISABLED", display, prov, "Not configured") + else: + from API.alldebrid import AllDebridClient + + client = AllDebridClient(api_key) + base_url = str(getattr(client, "base_url", "") or "").strip() + _add_startup_check("ENABLED", display, prov, base_url or "Connected") + except Exception as exc: + _add_startup_check("DISABLED", display, prov, str(exc)) + continue + + is_known = False + ok = None + + if prov in search_availability: + is_known = True + ok = bool(search_availability.get(prov)) + elif prov in file_availability: + is_known = True + ok = bool(file_availability.get(prov)) + elif prov in meta_availability: + is_known = True + ok = bool(meta_availability.get(prov)) + + if not is_known: + _add_startup_check("UNKNOWN", display, prov, "Not registered") + else: + # For non-login providers, include a lightweight URL reachability check. + detail = "Configured" if ok else "Not configured" + ping_targets = _default_provider_ping_targets(prov) + if ping_targets: + ping_ok, ping_detail = _ping_first(ping_targets) + if ok: + detail = ping_detail + else: + detail = (detail + " | " + ping_detail) if ping_detail else detail + _add_startup_check("ENABLED" if ok else "DISABLED", display, prov, detail) + + already_checked.add(prov) + + # Also show default non-login providers even if they aren't configured. + # This helps users know what's available/reachable out of the box. + default_search_providers = ["openlibrary", "libgen", "youtube", "bandcamp"] + for prov in default_search_providers: + if prov in already_checked: + continue + display = _provider_display_name(prov) + ok = bool(search_availability.get(prov)) if prov in search_availability else False + ping_targets = _default_provider_ping_targets(prov) + ping_ok, ping_detail = _ping_first(ping_targets) if ping_targets else (False, "No ping target") + detail = ping_detail if ping_detail else ("Available" if ok else "Unavailable") + # If the provider isn't even import/dep available, show that first. + if not ok: + detail = ("Unavailable" + (f" | {ping_detail}" if ping_detail else "")) + _add_startup_check("ENABLED" if (ok and ping_ok) else "DISABLED", display, prov, detail) + already_checked.add(prov) + + # Default file providers (no login): 0x0 + if "0x0" not in already_checked: + ok = bool(file_availability.get("0x0")) if "0x0" in file_availability else False + ping_ok, ping_detail = _ping_url("https://0x0.st") + detail = ping_detail + if not ok: + detail = ("Unavailable" + (f" | {ping_detail}" if ping_detail else "")) + _add_startup_check("ENABLED" if (ok and ping_ok) else "DISABLED", "0x0", "0x0", detail) + already_checked.add("0x0") + if _has_provider(config, "matrix"): # Matrix availability is validated by Provider.matrix.Matrix.__init__. try: @@ -1397,9 +1720,9 @@ def _execute_pipeline(tokens: list): if table_for_stage: ctx.set_current_stage_table(table_for_stage) - # Special check for table-specific behavior BEFORE command expansion - # If we are selecting from a YouTube or Soulseek search, we want to force auto-piping to .pipe - # instead of trying to expand to a command (which search-file doesn't support well for re-execution) + # Special check for table-specific behavior BEFORE command expansion. + # For some provider tables, we prefer item-based selection over command expansion, + # and may auto-append a sensible follow-up stage (e.g. YouTube -> download-media). source_cmd = ctx.get_current_stage_table_source_command() source_args = ctx.get_current_stage_table_source_args() @@ -1409,7 +1732,7 @@ def _execute_pipeline(tokens: list): # Logic based on table type if table_type == 'youtube' or table_type == 'soulseek': - # Force fallback to item-based selection so we can auto-pipe + # Force fallback to item-based selection so we can auto-append a follow-up stage command_expanded = False # Skip the command expansion block below elif source_cmd == 'search-file' and source_args and 'youtube' in source_args: @@ -1493,18 +1816,21 @@ def _execute_pipeline(tokens: list): if not stages: if table_type == 'youtube': - print(f"Auto-piping YouTube selection to .pipe") - stages.append(['.pipe']) + print(f"Auto-running YouTube selection via download-media") + stages.append(['download-media']) elif table_type == 'soulseek': print(f"Auto-piping Soulseek selection to download-file") stages.append(['download-file']) elif table_type == 'openlibrary': print(f"Auto-piping OpenLibrary selection to download-file") stages.append(['download-file']) + elif table_type == 'libgen': + print(f"Auto-piping Libgen selection to download-file") + stages.append(['download-file']) elif source_cmd == 'search-file' and source_args and 'youtube' in source_args: # Legacy check - print(f"Auto-piping YouTube selection to .pipe") - stages.append(['.pipe']) + print(f"Auto-running YouTube selection via download-media") + stages.append(['download-media']) else: # If the user is piping a provider selection into additional stages (e.g. add-file), # automatically insert the appropriate download stage so @N is "logical". @@ -1513,6 +1839,12 @@ def _execute_pipeline(tokens: list): if table_type == 'soulseek' and first_cmd not in ('download-file', 'download-media', 'download_media', '.pipe'): print(f"Auto-inserting download-file after Soulseek selection") stages.insert(0, ['download-file']) + if table_type == 'youtube' and first_cmd not in ('download-media', 'download_media', 'download-file', '.pipe'): + print(f"Auto-inserting download-media after YouTube selection") + stages.insert(0, ['download-media']) + if table_type == 'libgen' and first_cmd not in ('download-file', 'download-media', 'download_media', '.pipe'): + print(f"Auto-inserting download-file after Libgen selection") + stages.insert(0, ['download-file']) else: print(f"No items matched selection in pipeline\n") @@ -1567,6 +1899,15 @@ def _execute_pipeline(tokens: list): # Check if piped_result contains format objects and we have expansion info source_cmd = ctx.get_current_stage_table_source_command() source_args = ctx.get_current_stage_table_source_args() + + # If selecting from a YouTube results table and this is the last stage, + # auto-run download-media instead of leaving a bare selection. + current_table = ctx.get_current_stage_table() + table_type = current_table.table if current_table and hasattr(current_table, 'table') else None + if table_type == 'youtube' and stage_index + 1 >= len(stages): + print(f"Auto-running YouTube selection via download-media") + stages.append(['download-media', *stage_args]) + should_expand_to_command = False if source_cmd == '.pipe' or source_cmd == '.adjective': should_expand_to_command = True @@ -1574,11 +1915,11 @@ def _execute_pipeline(tokens: list): # When piping playlist rows to another cmdlet, prefer item-based selection should_expand_to_command = False elif source_cmd == 'search-file' and source_args and 'youtube' in source_args: - # Special case for youtube search results: @N expands to .pipe + # Legacy behavior: selection at end should run a sensible follow-up. if stage_index + 1 >= len(stages): # Only auto-pipe if this is the last stage - print(f"Auto-piping YouTube selection to .pipe") - stages.append(['.pipe']) + print(f"Auto-running YouTube selection via download-media") + stages.append(['download-media']) # Force should_expand_to_command to False so we fall through to filtering should_expand_to_command = False @@ -1671,6 +2012,26 @@ def _execute_pipeline(tokens: list): piped_result = filtered_pipe_objs if len(filtered_pipe_objs) > 1 else filtered_pipe_objs[0] print(f"Selected {len(filtered)} item(s) using {cmd_name}") + # If selecting YouTube results and there are downstream stages, + # insert download-media so subsequent cmdlets receive a local temp file. + try: + current_table = ctx.get_current_stage_table() + table_type = current_table.table if current_table and hasattr(current_table, 'table') else None + except Exception: + table_type = None + + if table_type == 'youtube' and stage_index + 1 < len(stages): + next_cmd = stages[stage_index + 1][0] if stages[stage_index + 1] else None + if next_cmd not in ('download-media', 'download_media', 'download-file', '.pipe'): + print("Auto-inserting download-media after YouTube selection") + stages.insert(stage_index + 1, ['download-media']) + + if table_type == 'libgen' and stage_index + 1 < len(stages): + next_cmd = stages[stage_index + 1][0] if stages[stage_index + 1] else None + if next_cmd not in ('download-file', 'download-media', 'download_media', '.pipe'): + print("Auto-inserting download-file after Libgen selection") + stages.insert(stage_index + 1, ['download-file']) + # If selection is the last stage and looks like a provider result, # auto-initiate the borrow/download flow. if stage_index + 1 >= len(stages): @@ -1699,6 +2060,11 @@ def _execute_pipeline(tokens: list): if provider is not None: print("Auto-downloading selection via download-file") stages.append(["download-file"]) + else: + # Fallback: if we know the current table type, prefer a sensible default. + if table_type == 'libgen': + print("Auto-downloading Libgen selection via download-file") + stages.append(["download-file"]) continue else: print(f"No items matched selection {cmd_name}\n") @@ -1719,6 +2085,13 @@ def _execute_pipeline(tokens: list): pipeline_status = "failed" pipeline_error = f"Unknown command {cmd_name}" return + + # Prevent stale tables (e.g., a previous download-media format picker) + # from leaking into subsequent stages and being displayed again. + try: + ctx.set_current_stage_table(None) + except Exception: + pass debug(f"[pipeline] Stage {stage_index}: cmd_name={cmd_name}, cmd_fn type={type(cmd_fn)}, piped_result type={type(piped_result)}, stage_args={stage_args}") @@ -1758,7 +2131,7 @@ def _execute_pipeline(tokens: list): if is_last_stage: # Last stage - display results if RESULT_TABLE_AVAILABLE and ResultTable is not None and pipeline_ctx.emits: - table_title = _get_table_title_for_command(cmd_name, pipeline_ctx.emits) + table_title = _get_table_title_for_command(cmd_name, pipeline_ctx.emits, stage_args) # Only set source_command for search/filter commands (not display-only or action commands) # This preserves context so @N refers to the original search, not intermediate results @@ -1776,7 +2149,8 @@ def _execute_pipeline(tokens: list): self_managing_commands = { 'get-tag', 'get_tag', 'tags', 'get-url', 'get_url', - 'search-file', 'search_file' + 'search-file', 'search_file', + 'search-provider', 'search_provider' } overlay_table = ctx.get_display_table() if hasattr(ctx, 'get_display_table') else None @@ -1858,6 +2232,24 @@ def _execute_pipeline(tokens: list): else: # No output from this stage. If it presented a selectable table (e.g., format list), pause # and stash the remaining pipeline so @N can resume with the selection applied. + if is_last_stage: + # Last stage with no emitted items: only display a *current* selectable table set by + # the cmdlet (e.g., download-media format picker). Do NOT fall back to last_result_table, + # which may be stale from a previous command. + stage_table_source = ctx.get_current_stage_table_source_command() + row_has_selection = ctx.get_current_stage_table_row_selection_args(0) is not None + stage_table = ctx.get_current_stage_table() + if not stage_table and hasattr(ctx, 'get_display_table'): + stage_table = ctx.get_display_table() + + if RESULT_TABLE_AVAILABLE and stage_table is not None and stage_table_source and row_has_selection: + try: + print() + print(stage_table.format_plain()) + except Exception: + pass + continue + if not is_last_stage: stage_table_source = ctx.get_current_stage_table_source_command() row_has_selection = ctx.get_current_stage_table_row_selection_args(0) is not None @@ -2016,39 +2408,69 @@ def _execute_cmdlet(cmd_name: str, args: list): # Load config relative to CLI root config = _load_cli_config() - # Check for @ selection syntax in arguments - # Extract @N, @N-M, @{N,M,P} syntax and remove from args - filtered_args = [] - selected_indices = [] - - for arg in args: - if arg.startswith('@'): - # Parse selection: @2, @2-5, @{1,3,5} - selection_str = arg[1:] # Remove @ - try: - if '{' in selection_str and '}' in selection_str: - # @{1,3,5} format - selection_str = selection_str.strip('{}') - selected_indices = [int(x.strip()) - 1 for x in selection_str.split(',')] - elif '-' in selection_str: - # @2-5 format - parts = selection_str.split('-') - start = int(parts[0]) - 1 - end = int(parts[1]) - selected_indices = list(range(start, end)) - else: - # @2 format - selected_indices = [int(selection_str) - 1] - except (ValueError, IndexError): - # Invalid format, treat as regular arg - # Special case: @"string" should be treated as "string" (stripping @) - # This allows adding new items via @"New Item" syntax - if selection_str.startswith('"') or selection_str.startswith("'"): - filtered_args.append(selection_str.strip('"\'')) - else: - filtered_args.append(arg) - else: + # Check for @ selection syntax in arguments. + # IMPORTANT: support using @N as a VALUE for a value-taking flag (e.g. add-relationship -king @1). + # Only treat @ tokens as selection when they are NOT in a value position. + filtered_args: list[str] = [] + selected_indices: list[int] = [] + select_all = False + + # Build a set of flag tokens that consume a value for this cmdlet. + # We use cmdlet metadata so we don't break patterns like: get-tag -raw @1 (where -raw is a flag). + value_flags: set[str] = set() + try: + meta = _catalog_get_cmdlet_metadata(cmd_name) + raw = meta.get("raw") if isinstance(meta, dict) else None + arg_specs = getattr(raw, "arg", None) if raw is not None else None + if isinstance(arg_specs, list): + for spec in arg_specs: + try: + spec_type = str(getattr(spec, "type", "string") or "string").strip().lower() + if spec_type == "flag": + continue + spec_name = str(getattr(spec, "name", "") or "") + canonical = spec_name.lstrip("-").strip() + if not canonical: + continue + value_flags.add(f"-{canonical}".lower()) + value_flags.add(f"--{canonical}".lower()) + alias = str(getattr(spec, "alias", "") or "").strip() + if alias: + value_flags.add(f"-{alias}".lower()) + except Exception: + continue + except Exception: + value_flags = set() + + for i, arg in enumerate(args): + if isinstance(arg, str) and arg.startswith('@'): + prev = str(args[i - 1]).lower() if i > 0 else "" + # If this @ token is the value for a value-taking flag, keep it. + if prev in value_flags: + filtered_args.append(arg) + continue + + # Special case: @"string" should be treated as "string" (stripping @) + # This allows adding new items via @"New Item" syntax + if len(arg) >= 2 and (arg[1] == '"' or arg[1] == "'"): + filtered_args.append(arg[1:].strip('"\'')) + continue + + # Parse selection: @2, @2-5, @{1,3,5}, @3,5,7, @3-6,8, @* + if arg.strip() == "@*": + select_all = True + continue + + selection = _parse_selection_syntax(arg) + if selection is not None: + zero_based = sorted(i - 1 for i in selection if isinstance(i, int) and i > 0) + selected_indices.extend([idx for idx in zero_based if idx not in selected_indices]) + continue + + # Not a valid selection, treat as regular arg filtered_args.append(arg) + else: + filtered_args.append(str(arg)) # Get piped items from previous command results piped_items = ctx.get_last_result_items() @@ -2056,7 +2478,9 @@ def _execute_cmdlet(cmd_name: str, args: list): # Create result object - pass full list (or filtered list if @ selection used) to cmdlet result = None if piped_items: - if selected_indices: + if select_all: + result = piped_items + elif selected_indices: # Filter to selected indices only result = [piped_items[idx] for idx in selected_indices if 0 <= idx < len(piped_items)] else: @@ -2101,7 +2525,7 @@ def _execute_cmdlet(cmd_name: str, args: list): ctx.set_last_result_items_only(pipeline_ctx.emits) else: # Try to format as a table if we have search results - table_title = _get_table_title_for_command(cmd_name, pipeline_ctx.emits) + table_title = _get_table_title_for_command(cmd_name, pipeline_ctx.emits, filtered_args) # Only set source_command for search/filter commands (not display-only or action commands) # This preserves context so @N refers to the original search, not intermediate results @@ -2118,7 +2542,8 @@ def _execute_cmdlet(cmd_name: str, args: list): # Commands that manage their own table/history state (e.g. get-tag) self_managing_commands = { 'get-tag', 'get_tag', 'tags', - 'search-file', 'search_file' + 'search-file', 'search_file', + 'search-provider', 'search_provider' } if cmd_name in self_managing_commands: diff --git a/MPV/mpv_ipc.py b/MPV/mpv_ipc.py index 8c0b79f..ed801ad 100644 --- a/MPV/mpv_ipc.py +++ b/MPV/mpv_ipc.py @@ -33,6 +33,33 @@ _LYRIC_LOG_FH: Optional[Any] = None _MPV_AVAILABILITY_CACHE: Optional[Tuple[bool, Optional[str]]] = None +def _windows_hidden_subprocess_kwargs() -> Dict[str, Any]: + """Best-effort kwargs to avoid flashing console windows on Windows. + + Applies to subprocess.run/check_output/Popen. + """ + if platform.system() != "Windows": + return {} + + kwargs: Dict[str, Any] = {} + try: + create_no_window = getattr(subprocess, "CREATE_NO_WINDOW", 0x08000000) + kwargs["creationflags"] = int(create_no_window) + except Exception: + pass + + # Also set startupinfo to hidden, for APIs that honor it. + try: + si = subprocess.STARTUPINFO() + si.dwFlags |= subprocess.STARTF_USESHOWWINDOW + si.wShowWindow = subprocess.SW_HIDE + kwargs["startupinfo"] = si + except Exception: + pass + + return kwargs + + def _check_mpv_availability() -> Tuple[bool, Optional[str]]: """Return (available, reason) for the mpv executable. @@ -57,6 +84,7 @@ def _check_mpv_availability() -> Tuple[bool, Optional[str]]: capture_output=True, text=True, timeout=2, + **_windows_hidden_subprocess_kwargs(), ) if result.returncode == 0: _MPV_AVAILABILITY_CACHE = (True, None) @@ -97,6 +125,7 @@ def _windows_list_lyric_helper_pids(ipc_path: str) -> List[int]: stderr=subprocess.DEVNULL, timeout=2, text=True, + **_windows_hidden_subprocess_kwargs(), ) except Exception: return [] @@ -141,6 +170,7 @@ def _windows_kill_pids(pids: List[int]) -> None: stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=2, + **_windows_hidden_subprocess_kwargs(), ) except Exception: continue @@ -384,6 +414,7 @@ class MPV: stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=2, + **_windows_hidden_subprocess_kwargs(), ) except Exception: return @@ -415,8 +446,20 @@ class MPV: cmd.extend([str(a) for a in extra_args if a]) kwargs: Dict[str, Any] = {} - if detached and platform.system() == "Windows": - kwargs["creationflags"] = 0x00000008 # DETACHED_PROCESS + if platform.system() == "Windows": + # Ensure we don't flash a console window when spawning mpv. + flags = 0 + try: + flags |= int(getattr(subprocess, "DETACHED_PROCESS", 0x00000008)) if detached else 0 + except Exception: + flags |= 0x00000008 if detached else 0 + try: + flags |= int(getattr(subprocess, "CREATE_NO_WINDOW", 0x08000000)) + except Exception: + flags |= 0x08000000 + kwargs["creationflags"] = flags + # startupinfo is harmless for GUI apps; helps hide flashes for console-subsystem builds. + kwargs.update({k: v for k, v in _windows_hidden_subprocess_kwargs().items() if k != "creationflags"}) debug("Starting MPV") subprocess.Popen(cmd, stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, **kwargs) diff --git a/Provider/alldebrid.py b/Provider/alldebrid.py new file mode 100644 index 0000000..5bec6ca --- /dev/null +++ b/Provider/alldebrid.py @@ -0,0 +1,296 @@ +from __future__ import annotations + +import sys +from typing import Any, Dict, Iterable, List, Optional + +from ProviderCore.base import SearchProvider, SearchResult +from SYS.logger import log + + +def _get_debrid_api_key(config: Dict[str, Any]) -> Optional[str]: + """Read AllDebrid API key from config. + + Preferred formats: + - config.conf provider block: + [provider=alldebrid] + api_key=... + -> config["provider"]["alldebrid"]["api_key"] + + - store-style debrid block: + config["store"]["debrid"]["all-debrid"]["api_key"] + + Falls back to some legacy keys if present. + """ + # 1) provider block: [provider=alldebrid] + provider = config.get("provider") + if isinstance(provider, dict): + entry = provider.get("alldebrid") + if isinstance(entry, dict): + for k in ("api_key", "apikey", "API_KEY", "APIKEY"): + val = entry.get(k) + if isinstance(val, str) and val.strip(): + return val.strip() + if isinstance(entry, str) and entry.strip(): + return entry.strip() + + # 2) store.debrid block (canonical for debrid store configuration) + try: + from config import get_debrid_api_key + + key = get_debrid_api_key(config, service="All-debrid") + return key.strip() if key else None + except Exception: + pass + + # Legacy fallback (kept permissive so older configs still work) + for legacy_key in ("alldebrid_api_key", "AllDebrid", "all_debrid_api_key"): + val = config.get(legacy_key) + if isinstance(val, str) and val.strip(): + return val.strip() + + return None + + +class AllDebrid(SearchProvider): + """Search provider for AllDebrid account content. + + This provider lists and searches the files/magnets already present in the + user's AllDebrid account. + + Query behavior: + - "*" / "all" / "list": list recent files from ready magnets + - otherwise: substring match on file name OR magnet name, or exact magnet id + """ + + def validate(self) -> bool: + # Consider "available" when configured; actual API connectivity can vary. + return bool(_get_debrid_api_key(self.config or {})) + + @staticmethod + def _flatten_files(items: Any) -> Iterable[Dict[str, Any]]: + """Flatten AllDebrid magnet file tree into file dicts. + + API commonly returns: + - file: {n: name, s: size, l: link} + - folder: {n: name, e: [sub_items]} + + Some call sites in this repo also expect {name, size, link}, so we accept both. + """ + if not items: + return + if isinstance(items, dict): + items = [items] + if not isinstance(items, list): + return + + for node in items: + if not isinstance(node, dict): + continue + + children = node.get('e') or node.get('children') + if isinstance(children, list): + yield from AllDebrid._flatten_files(children) + continue + + name = node.get('n') or node.get('name') + link = node.get('l') or node.get('link') + if isinstance(name, str) and name.strip() and isinstance(link, str) and link.strip(): + yield node + + def search( + self, + query: str, + limit: int = 50, + filters: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[SearchResult]: + q = (query or "").strip() + if not q: + return [] + + api_key = _get_debrid_api_key(self.config or {}) + if not api_key: + return [] + + view = None + if isinstance(filters, dict): + view = str(filters.get("view") or "").strip().lower() or None + view = view or "folders" + + try: + from API.alldebrid import AllDebridClient + + client = AllDebridClient(api_key) + except Exception as exc: + log(f"[alldebrid] Failed to init client: {exc}", file=sys.stderr) + return [] + + q_lower = q.lower() + needle = "" if q_lower in {"*", "all", "list"} else q_lower + + # Second-stage: list files for a specific magnet id. + if view == "files": + magnet_id_val = None + if isinstance(filters, dict): + magnet_id_val = filters.get("magnet_id") + if magnet_id_val is None: + magnet_id_val = kwargs.get("magnet_id") + + try: + magnet_id = int(magnet_id_val) + except Exception: + return [] + + magnet_status: Dict[str, Any] = {} + try: + magnet_status = client.magnet_status(magnet_id) + except Exception: + magnet_status = {} + + magnet_name = str(magnet_status.get('filename') or magnet_status.get('name') or magnet_status.get('hash') or f"magnet-{magnet_id}") + status_code = magnet_status.get('statusCode') + status_text = str(magnet_status.get('status') or "").strip() or "unknown" + ready = status_code == 4 or bool(magnet_status.get('ready')) + + if not ready: + return [ + SearchResult( + table="alldebrid", + title=magnet_name, + path=f"alldebrid:magnet:{magnet_id}", + detail=status_text, + annotations=["folder", "not-ready"], + media_kind="folder", + tag={"alldebrid", "folder", str(magnet_id), "not-ready"}, + columns=[ + ("Folder", magnet_name), + ("ID", str(magnet_id)), + ("Status", status_text), + ("Ready", "no"), + ], + full_metadata={"magnet": magnet_status, "magnet_id": magnet_id}, + ) + ] + + try: + files_result = client.magnet_links([magnet_id]) + magnet_files = files_result.get(str(magnet_id), {}) if isinstance(files_result, dict) else {} + file_tree = magnet_files.get('files', []) if isinstance(magnet_files, dict) else [] + except Exception as exc: + log(f"[alldebrid] Failed to list files for magnet {magnet_id}: {exc}", file=sys.stderr) + file_tree = [] + + results: List[SearchResult] = [] + for file_node in self._flatten_files(file_tree): + file_name = str(file_node.get('n') or file_node.get('name') or '').strip() + file_url = str(file_node.get('l') or file_node.get('link') or '').strip() + file_size = file_node.get('s') or file_node.get('size') + if not file_name or not file_url: + continue + + if needle and needle not in file_name.lower(): + continue + + size_bytes: Optional[int] = None + try: + if isinstance(file_size, (int, float)): + size_bytes = int(file_size) + elif isinstance(file_size, str) and file_size.isdigit(): + size_bytes = int(file_size) + except Exception: + size_bytes = None + + results.append( + SearchResult( + table="alldebrid", + title=file_name, + path=file_url, + detail=magnet_name, + annotations=["file"], + media_kind="file", + size_bytes=size_bytes, + tag={"alldebrid", "file", str(magnet_id)}, + columns=[ + ("File", file_name), + ("Folder", magnet_name), + ("ID", str(magnet_id)), + ], + full_metadata={"magnet": magnet_status, "magnet_id": magnet_id, "file": file_node}, + ) + ) + if len(results) >= max(1, limit): + break + + return results + + # Default: folders view (magnets) + try: + magnets = client.magnet_list() or [] + except Exception as exc: + log(f"[alldebrid] Failed to list account magnets: {exc}", file=sys.stderr) + return [] + + wanted_id: Optional[int] = None + if needle.isdigit(): + try: + wanted_id = int(needle) + except Exception: + wanted_id = None + + results: List[SearchResult] = [] + for magnet in magnets: + if not isinstance(magnet, dict): + continue + + try: + magnet_id = int(magnet.get('id')) + except Exception: + continue + + magnet_name = str(magnet.get('filename') or magnet.get('name') or magnet.get('hash') or f"magnet-{magnet_id}") + magnet_name_lower = magnet_name.lower() + + status_text = str(magnet.get('status') or "").strip() or "unknown" + status_code = magnet.get('statusCode') + ready = status_code == 4 or bool(magnet.get('ready')) + + if wanted_id is not None: + if magnet_id != wanted_id: + continue + elif needle and (needle not in magnet_name_lower): + continue + + size_bytes: Optional[int] = None + try: + size_val = magnet.get('size') + if isinstance(size_val, (int, float)): + size_bytes = int(size_val) + elif isinstance(size_val, str) and size_val.isdigit(): + size_bytes = int(size_val) + except Exception: + size_bytes = None + + results.append( + SearchResult( + table="alldebrid", + title=magnet_name, + path=f"alldebrid:magnet:{magnet_id}", + detail=status_text, + annotations=["folder"], + media_kind="folder", + size_bytes=size_bytes, + tag={"alldebrid", "folder", str(magnet_id)} | ({"ready"} if ready else {"not-ready"}), + columns=[ + ("Folder", magnet_name), + ("ID", str(magnet_id)), + ("Status", status_text), + ("Ready", "yes" if ready else "no"), + ], + full_metadata={"magnet": magnet, "magnet_id": magnet_id}, + ) + ) + + if len(results) >= max(1, limit): + break + + return results diff --git a/Provider/libgen.py b/Provider/libgen.py index 303b6d4..e83381d 100644 --- a/Provider/libgen.py +++ b/Provider/libgen.py @@ -4,12 +4,15 @@ import logging import re import requests import sys +import time from pathlib import Path from typing import Any, Callable, Dict, List, Optional, Tuple -from urllib.parse import quote, urljoin, urlparse, unquote +from urllib.parse import urljoin, urlparse, unquote from ProviderCore.base import SearchProvider, SearchResult +from ProviderCore.download import sanitize_filename from SYS.logger import log +from models import ProgressBar # Optional dependencies @@ -33,6 +36,7 @@ class Libgen(SearchProvider): try: from cli_syntax import get_field, get_free_text, parse_query + from SYS.logger import is_debug_enabled parsed = parse_query(query) isbn = get_field(parsed, "isbn") @@ -42,16 +46,24 @@ class Libgen(SearchProvider): search_query = isbn or title or author or free_text or query + debug_info = None + try: + if is_debug_enabled(): + debug_info = lambda msg: log(msg, file=sys.stderr) + except Exception: + debug_info = None + books = search_libgen( search_query, limit=limit, + log_info=debug_info, log_error=lambda msg: log(msg, file=sys.stderr), ) results: List[SearchResult] = [] for idx, book in enumerate(books, 1): - title = book.get("title", "Unknown") - author = book.get("author", "Unknown") + title = str(book.get("title") or "").strip() or "Unknown" + author = str(book.get("author") or "").strip() or "Unknown" year = book.get("year", "Unknown") pages = book.get("pages") or book.get("pages_str") or "" extension = book.get("extension", "") or book.get("ext", "") @@ -104,10 +116,106 @@ class Libgen(SearchProvider): return [] def validate(self) -> bool: + # JSON-based searching can work without BeautifulSoup; HTML parsing is a fallback. + return True + + def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]: + """Download a LibGen SearchResult into output_dir. + + This is used by the download-file cmdlet when a provider item is piped. + """ try: - return BeautifulSoup is not None + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + target = str(getattr(result, "path", "") or "") + md = getattr(result, "full_metadata", None) + if not isinstance(md, dict): + md = {} + + title = str(getattr(result, "title", "") or "").strip() + md5 = str(md.get("md5") or "").strip() + extension = str(md.get("extension") or "").strip().lstrip(".") + + if (not target) or target.startswith("libgen:"): + if md5 and re.fullmatch(r"[a-fA-F0-9]{32}", md5): + target = urljoin(MIRRORS[0], f"/ads.php?md5={md5}") + + if not target: + return None + + base_name = sanitize_filename(title or md5 or "libgen") + out_path = output_dir / base_name + if extension: + out_path = out_path.with_suffix(f".{extension}") + + if out_path.exists(): + stem = out_path.stem + suffix = out_path.suffix + counter = 1 + while out_path.exists() and counter < 200: + out_path = out_path.with_name(f"{stem}({counter}){suffix}") + counter += 1 + + # Show a progress bar on stderr (safe for pipelines). + progress_bar = ProgressBar() + start_time = time.time() + # Allow the first callback to print immediately. + last_progress_time = [0.0] + label = out_path.name + + def progress_callback(bytes_downloaded: int, content_length: int) -> None: + # Throttle updates to avoid flooding output. + now = time.time() + if now - last_progress_time[0] < 0.5: + return + + total = int(content_length) if content_length and content_length > 0 else None + downloaded = int(bytes_downloaded) if bytes_downloaded and bytes_downloaded > 0 else 0 + elapsed = max(0.001, now - start_time) + speed = downloaded / elapsed + + eta_seconds = 0.0 + if total and total > 0 and speed > 0: + eta_seconds = max(0.0, float(total - downloaded) / float(speed)) + minutes, seconds = divmod(int(eta_seconds), 60) + hours, minutes = divmod(minutes, 60) + eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}" if total else "?:?:?" + speed_str = progress_bar.format_bytes(speed) + "/s" + + percent_str = None + if total and total > 0: + percent = (downloaded / total) * 100.0 + percent_str = f"{percent:.1f}%" + + line = progress_bar.format_progress( + percent_str=percent_str, + downloaded=downloaded, + total=total, + speed_str=speed_str, + eta_str=eta_str, + ) + + # Prefix with filename for clarity when downloading multiple items. + if label: + line = f"{label} {line}" + + if getattr(sys.stderr, "isatty", lambda: True)(): + sys.stderr.write("\r" + line + " ") + sys.stderr.flush() + last_progress_time[0] = now + + ok, final_path = download_from_mirror(target, out_path, progress_callback=progress_callback) + # Clear the in-place progress line. + if getattr(sys.stderr, "isatty", lambda: True)(): + sys.stderr.write("\r" + (" " * 180) + "\r") + sys.stderr.write("\n") + sys.stderr.flush() + if ok and final_path: + return Path(final_path) + return None except Exception: - return False + return None LogFn = Optional[Callable[[str], None]] @@ -116,18 +224,26 @@ ErrorFn = Optional[Callable[[str], None]] DEFAULT_TIMEOUT = 20.0 DEFAULT_LIMIT = 50 +# Keep LibGen searches responsive even if mirrors are blocked or slow. +# Note: requests' timeout doesn't always cover DNS stalls, but this prevents +# multi-mirror attempts from taking minutes. +DEFAULT_SEARCH_TOTAL_TIMEOUT = 20.0 +DEFAULT_CONNECT_TIMEOUT = 4.0 +DEFAULT_READ_TIMEOUT = 10.0 + # Mirrors to try in order MIRRORS = [ + # Prefer .gl first (often most reachable/stable) + "https://libgen.gl", + "http://libgen.gl", + "https://libgen.li", + "http://libgen.li", "https://libgen.is", "https://libgen.rs", "https://libgen.st", "http://libgen.is", "http://libgen.rs", "http://libgen.st", - "https://libgen.li", # Different structure, fallback - "http://libgen.li", - "https://libgen.gl", # Different structure, fallback - "http://libgen.gl", ] logging.getLogger(__name__).setLevel(logging.INFO) @@ -147,28 +263,146 @@ class LibgenSearch: "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" }) - def search(self, query: str, limit: int = DEFAULT_LIMIT) -> List[Dict[str, Any]]: - """Search LibGen mirrors.""" - if not BeautifulSoup: - logging.error("BeautifulSoup not installed. Cannot search LibGen.") + def _search_libgen_json( + self, + mirror: str, + query: str, + limit: int, + *, + timeout: Any = DEFAULT_TIMEOUT, + ) -> List[Dict[str, Any]]: + """Search libgen.rs/is/st JSON API when available. + + Many LibGen mirrors expose /json.php which is less brittle than scraping. + """ + url = f"{mirror}/json.php" + params = { + "req": query, + "res": max(1, min(100, int(limit) if limit else 50)), + "column": "def", + "phrase": 1, + } + + resp = self.session.get(url, params=params, timeout=timeout) + resp.raise_for_status() + + data = resp.json() + if not isinstance(data, list): return [] + results: List[Dict[str, Any]] = [] + for item in data: + if not isinstance(item, dict): + continue + + # LibGen JSON responses vary by mirror; accept several common keys. + raw_id = item.get("ID") or item.get("Id") or item.get("id") or "" + title = item.get("Title") or item.get("title") or "" + author = item.get("Author") or item.get("author") or "" + publisher = item.get("Publisher") or item.get("publisher") or "" + year = item.get("Year") or item.get("year") or "" + pages = item.get("Pages") or item.get("pages") or "" + language = item.get("Language") or item.get("language") or "" + size = item.get("Size") or item.get("size") or item.get("filesize") or "" + extension = item.get("Extension") or item.get("extension") or item.get("ext") or "" + md5 = item.get("MD5") or item.get("md5") or "" + + download_link = f"http://library.lol/main/{md5}" if md5 else "" + + results.append({ + "id": str(raw_id), + "title": str(title), + "author": str(author), + "publisher": str(publisher), + "year": str(year), + "pages": str(pages), + "language": str(language), + "filesize_str": str(size), + "extension": str(extension), + "md5": str(md5), + "mirror_url": download_link, + "cover": "", + }) + + if len(results) >= limit: + break + + return results + + def search( + self, + query: str, + limit: int = DEFAULT_LIMIT, + *, + total_timeout: float = DEFAULT_SEARCH_TOTAL_TIMEOUT, + log_info: LogFn = None, + log_error: ErrorFn = None, + ) -> List[Dict[str, Any]]: + """Search LibGen mirrors. + + Uses a total time budget across mirrors to avoid long hangs. + """ + # Prefer JSON API (no BeautifulSoup needed); HTML scraping is a fallback. + has_bs4 = BeautifulSoup is not None + + started = time.monotonic() + for mirror in MIRRORS: + elapsed = time.monotonic() - started + remaining = total_timeout - elapsed + if remaining <= 0: + _call(log_error, f"[libgen] Search timed out after {total_timeout:.0f}s") + break + + # Bound each request so we can try multiple mirrors within the budget. + # Keep connect+read within the remaining budget as a best-effort. + connect_timeout = min(DEFAULT_CONNECT_TIMEOUT, max(0.1, remaining)) + read_budget = max(0.1, remaining - connect_timeout) + read_timeout = min(DEFAULT_READ_TIMEOUT, read_budget) + request_timeout: Any = (connect_timeout, read_timeout) + + _call(log_info, f"[libgen] Trying mirror: {mirror}") + try: - if "libgen.li" in mirror or "libgen.gl" in mirror: - results = self._search_libgen_li(mirror, query, limit) - else: - results = self._search_libgen_rs(mirror, query, limit) + # Try JSON first on *all* mirrors (including .gl/.li), then fall back to HTML scraping. + results: List[Dict[str, Any]] = [] + try: + results = self._search_libgen_json(mirror, query, limit, timeout=request_timeout) + except Exception: + results = [] + + if not results: + if not has_bs4: + continue + + if "libgen.li" in mirror or "libgen.gl" in mirror: + results = self._search_libgen_li(mirror, query, limit, timeout=request_timeout) + else: + results = self._search_libgen_rs(mirror, query, limit, timeout=request_timeout) if results: + _call(log_info, f"[libgen] Using mirror: {mirror}") return results + except requests.exceptions.Timeout: + _call(log_info, f"[libgen] Mirror timed out: {mirror}") + continue + except requests.exceptions.RequestException: + _call(log_info, f"[libgen] Mirror request failed: {mirror}") + continue except Exception as e: logging.debug(f"Mirror {mirror} failed: {e}") continue return [] - def _search_libgen_rs(self, mirror: str, query: str, limit: int) -> List[Dict[str, Any]]: + def _search_libgen_rs( + self, + mirror: str, + query: str, + limit: int, + *, + timeout: Any = DEFAULT_TIMEOUT, + ) -> List[Dict[str, Any]]: """Search libgen.rs/is/st style mirrors.""" url = f"{mirror}/search.php" params = { @@ -180,9 +414,11 @@ class LibgenSearch: "phrase": 1, } - resp = self.session.get(url, params=params, timeout=DEFAULT_TIMEOUT) + resp = self.session.get(url, params=params, timeout=timeout) resp.raise_for_status() + if BeautifulSoup is None: + return [] soup = BeautifulSoup(resp.text, "html.parser") table = soup.find("table", {"class": "c"}) @@ -215,7 +451,7 @@ class LibgenSearch: md5 = "" if title_tag and title_tag.has_attr("href"): - href = title_tag["href"] + href = str(title_tag.get("href") or "") match = re.search(r"md5=([a-fA-F0-9]{32})", href) if match: md5 = match.group(1) @@ -264,19 +500,29 @@ class LibgenSearch: return results - def _search_libgen_li(self, mirror: str, query: str, limit: int) -> List[Dict[str, Any]]: + def _search_libgen_li( + self, + mirror: str, + query: str, + limit: int, + *, + timeout: Any = DEFAULT_TIMEOUT, + ) -> List[Dict[str, Any]]: """Search libgen.li/gl style mirrors.""" url = f"{mirror}/index.php" params = { "req": query, - "res": 100, - "covers": "on", + # Keep the request lightweight; covers slow the HTML response. + "res": max(1, min(100, int(limit) if limit else 50)), + "covers": "off", "filesuns": "all", } - resp = self.session.get(url, params=params, timeout=DEFAULT_TIMEOUT) + resp = self.session.get(url, params=params, timeout=timeout) resp.raise_for_status() + if BeautifulSoup is None: + return [] soup = BeautifulSoup(resp.text, "html.parser") table = soup.find("table", {"id": "tablelibgen"}) if not table: @@ -294,46 +540,152 @@ class LibgenSearch: continue try: - title_col = cols[1] - title_link = title_col.find("a") - title = title_link.get_text(strip=True) if title_link else title_col.get_text(strip=True) - - libgen_id = "" - if title_link and title_link.has_attr("href"): - href = title_link["href"] - match = re.search(r"id=(\d+)", href) - if match: - libgen_id = match.group(1) - - authors = cols[2].get_text(strip=True) - publisher = cols[3].get_text(strip=True) - year = cols[4].get_text(strip=True) - language = cols[5].get_text(strip=True) - pages = cols[6].get_text(strip=True) - size = cols[7].get_text(strip=True) - extension = cols[8].get_text(strip=True) - + # Extract md5 (libgen.gl exposes /ads.php?md5=... in mirror column) + md5 = "" mirror_url = "" - if title_link: - href = title_link["href"] - if href.startswith("/"): - mirror_url = mirror + href - else: - mirror_url = urljoin(mirror, href) + for a in row.find_all("a"): + href = a.get("href") + if not href: + continue + m = re.search(r"md5=([a-fA-F0-9]{32})", str(href)) + if m: + md5 = m.group(1) + if "ads.php" in str(href): + mirror_url = urljoin(mirror, str(href)) + break + if not mirror_url and md5: + mirror_url = urljoin(mirror, f"/ads.php?md5={md5}") - results.append({ - "id": libgen_id, - "title": title, - "author": authors, - "publisher": publisher, - "year": year, - "pages": pages, - "language": language, - "filesize_str": size, - "extension": extension, - "md5": "", - "mirror_url": mirror_url, - }) + # Extract numeric file id from /file.php?id=... + libgen_id = "" + file_link = row.find("a", href=re.compile(r"/file\.php\?id=\d+")) + if file_link and file_link.get("href"): + m = re.search(r"id=(\d+)", str(file_link.get("href"))) + if m: + libgen_id = m.group(1) + + title = "" + authors = "" + publisher = "" + year = "" + language = "" + pages = "" + size = "" + extension = "" + isbn = "" + + # libgen.gl columns shift depending on whether covers are enabled. + # With covers on: cover, meta, author, publisher, year, language, pages, size, ext, mirrors (10) + # With covers off: meta, author, publisher, year, language, pages, size, ext, mirrors (9) + offset: Optional[int] = None + if len(cols) >= 10: + offset = 1 + elif len(cols) >= 9: + offset = 0 + + if offset is not None: + meta_cell = cols[offset] + meta_text = " ".join([str(s).strip() for s in meta_cell.stripped_strings if str(s).strip()]) + + # Extract ISBNs from meta cell (avoid using them as title) + # Matches 10 or 13-digit ISBN with optional leading 978/979. + isbn_candidates = re.findall(r"\b(?:97[89])?\d{9}[\dXx]\b", meta_text) + if isbn_candidates: + seen: List[str] = [] + for s in isbn_candidates: + s = s.upper() + if s not in seen: + seen.append(s) + isbn = "; ".join(seen) + + # Choose a "real" title from meta cell. + # libgen.gl meta can include series/edition/isbn blobs; prefer text with letters. + raw_candidates: List[str] = [] + for a in meta_cell.find_all("a"): + t = a.get_text(" ", strip=True) + if t: + raw_candidates.append(t) + for s in meta_cell.stripped_strings: + t = str(s).strip() + if t: + raw_candidates.append(t) + + deduped: List[str] = [] + for t in raw_candidates: + t = t.strip() + if t and t not in deduped: + deduped.append(t) + + def _looks_like_isbn_blob(text: str) -> bool: + if re.fullmatch(r"[0-9Xx;\s\-]+", text): + # Numbers-only (common for ISBN lists) + return True + if ";" in text and len(re.findall(r"[A-Za-z]", text)) == 0: + return True + return False + + best_title = "" + best_score: Optional[tuple] = None + for cand in deduped: + low = cand.lower().strip() + if low in {"cover", "edition"}: + continue + if _looks_like_isbn_blob(cand): + continue + + letters = len(re.findall(r"[A-Za-z]", cand)) + if letters < 3: + continue + + digits = len(re.findall(r"\d", cand)) + digit_ratio = digits / max(1, len(cand)) + # Prefer more letters, fewer digits, and longer strings. + score = (letters, -digit_ratio, len(cand)) + if best_score is None or score > best_score: + best_score = score + best_title = cand + + title = best_title or meta_cell.get_text(" ", strip=True) + + authors = cols[offset + 1].get_text(" ", strip=True) + publisher = cols[offset + 2].get_text(" ", strip=True) + year = cols[offset + 3].get_text(" ", strip=True) + language = cols[offset + 4].get_text(" ", strip=True) + pages = cols[offset + 5].get_text(" ", strip=True) + size = cols[offset + 6].get_text(" ", strip=True) + extension = cols[offset + 7].get_text(" ", strip=True) + else: + # Older fallback structure + title_col = cols[1] + title_link = title_col.find("a") + title = title_link.get_text(" ", strip=True) if title_link else title_col.get_text(" ", strip=True) + authors = cols[2].get_text(" ", strip=True) + publisher = cols[3].get_text(" ", strip=True) + year = cols[4].get_text(" ", strip=True) + language = cols[5].get_text(" ", strip=True) + pages = cols[6].get_text(" ", strip=True) + size = cols[7].get_text(" ", strip=True) + extension = cols[8].get_text(" ", strip=True) + + title = (title or "").strip() or "Unknown" + authors = (authors or "").strip() or "Unknown" + + results.append( + { + "id": libgen_id, + "title": title, + "author": authors, + "isbn": (isbn or "").strip(), + "publisher": (publisher or "").strip(), + "year": (year or "").strip(), + "pages": (pages or "").strip(), + "language": (language or "").strip(), + "filesize_str": (size or "").strip(), + "extension": (extension or "").strip(), + "md5": md5, + "mirror_url": mirror_url, + } + ) if len(results) >= limit: break @@ -354,7 +706,13 @@ def search_libgen( """Search Libgen using the robust scraper.""" searcher = LibgenSearch(session=session) try: - results = searcher.search(query, limit=limit) + results = searcher.search( + query, + limit=limit, + total_timeout=DEFAULT_SEARCH_TOTAL_TIMEOUT, + log_info=log_info, + log_error=log_error, + ) _call(log_info, f"[libgen] Found {len(results)} results") return results except Exception as e: @@ -371,6 +729,17 @@ def _resolve_download_url( current_url = url visited = set() + if BeautifulSoup is None: + _call(log_info, "[resolve] BeautifulSoup not available; cannot resolve HTML download chain") + return None + + def _find_a_by_text(pattern: str) -> Optional[Any]: + for a in soup.find_all("a"): + t = a.get_text(" ", strip=True) + if t and re.search(pattern, t, re.IGNORECASE): + return a + return None + for _ in range(6): if current_url in visited: break @@ -396,45 +765,40 @@ def _resolve_download_url( soup = BeautifulSoup(content, "html.parser") - get_link = soup.find("a", string=re.compile(r"^GET$", re.IGNORECASE)) - if not get_link: - h2_get = soup.find("h2", string=re.compile(r"^GET$", re.IGNORECASE)) - if h2_get and h2_get.parent.name == "a": - get_link = h2_get.parent - + get_link = _find_a_by_text(r"^GET$") if get_link and get_link.has_attr("href"): - return urljoin(current_url, get_link["href"]) + return urljoin(current_url, str(get_link.get("href") or "")) if "series.php" in current_url: edition_link = soup.find("a", href=re.compile(r"edition\.php")) if edition_link: - current_url = urljoin(current_url, edition_link["href"]) + current_url = urljoin(current_url, str(edition_link.get("href") or "")) continue if "edition.php" in current_url: file_link = soup.find("a", href=re.compile(r"file\.php")) if file_link: - current_url = urljoin(current_url, file_link["href"]) + current_url = urljoin(current_url, str(file_link.get("href") or "")) continue if "file.php" in current_url: libgen_link = soup.find("a", title="libgen") if not libgen_link: - libgen_link = soup.find("a", string=re.compile(r"Libgen", re.IGNORECASE)) + libgen_link = _find_a_by_text(r"Libgen") if libgen_link and libgen_link.has_attr("href"): - current_url = urljoin(current_url, libgen_link["href"]) + current_url = urljoin(current_url, str(libgen_link.get("href") or "")) continue if "ads.php" in current_url: get_php_link = soup.find("a", href=re.compile(r"get\.php")) if get_php_link: - return urljoin(current_url, get_php_link["href"]) + return urljoin(current_url, str(get_php_link.get("href") or "")) for text in ["Cloudflare", "IPFS.io", "Infura"]: - link = soup.find("a", string=re.compile(text, re.IGNORECASE)) + link = _find_a_by_text(re.escape(text)) if link and link.has_attr("href"): - return urljoin(current_url, link["href"]) + return urljoin(current_url, str(link.get("href") or "")) break diff --git a/Provider/matrix.py b/Provider/matrix.py index 7499b12..29b0157 100644 --- a/Provider/matrix.py +++ b/Provider/matrix.py @@ -1,8 +1,11 @@ from __future__ import annotations import mimetypes +import time +import uuid from pathlib import Path -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple +from urllib.parse import quote import requests @@ -57,17 +60,18 @@ class Matrix(FileProvider): matrix_conf = self.config.get("provider", {}).get("matrix", {}) if isinstance(self.config, dict) else {} homeserver = matrix_conf.get("homeserver") - room_id = matrix_conf.get("room_id") access_token = matrix_conf.get("access_token") password = matrix_conf.get("password") # Not configured: keep instance but mark invalid via validate(). - if not (homeserver and room_id and (access_token or password)): + # Note: `room_id` is intentionally NOT required, since the CLI can prompt + # the user to select a room dynamically. + if not (homeserver and (access_token or password)): self._init_ok = None self._init_reason = None return - cache_key = f"{_normalize_homeserver(str(homeserver))}|room:{room_id}|has_token:{bool(access_token)}" + cache_key = f"{_normalize_homeserver(str(homeserver))}|has_token:{bool(access_token)}" cached = _MATRIX_INIT_CHECK_CACHE.get(cache_key) if cached is None: ok, reason = _matrix_health_check(homeserver=str(homeserver), access_token=str(access_token) if access_token else None) @@ -88,34 +92,69 @@ class Matrix(FileProvider): matrix_conf = self.config.get("provider", {}).get("matrix", {}) return bool( matrix_conf.get("homeserver") - and matrix_conf.get("room_id") and (matrix_conf.get("access_token") or matrix_conf.get("password")) ) - def upload(self, file_path: str, **kwargs: Any) -> str: - path = Path(file_path) - if not path.exists(): - raise FileNotFoundError(f"File not found: {file_path}") - + def _get_homeserver_and_token(self) -> Tuple[str, str]: matrix_conf = self.config.get("provider", {}).get("matrix", {}) homeserver = matrix_conf.get("homeserver") access_token = matrix_conf.get("access_token") - room_id = matrix_conf.get("room_id") - if not homeserver: raise Exception("Matrix homeserver missing") if not access_token: raise Exception("Matrix access_token missing") + base = _normalize_homeserver(str(homeserver)) + if not base: + raise Exception("Matrix homeserver missing") + return base, str(access_token) + + def list_rooms(self) -> List[Dict[str, Any]]: + """Return the rooms the current user has joined. + + Uses `GET /_matrix/client/v3/joined_rooms`. + """ + base, token = self._get_homeserver_and_token() + headers = {"Authorization": f"Bearer {token}"} + resp = requests.get(f"{base}/_matrix/client/v3/joined_rooms", headers=headers, timeout=10) + if resp.status_code != 200: + raise Exception(f"Matrix joined_rooms failed: {resp.text}") + data = resp.json() or {} + rooms = data.get("joined_rooms") or [] + out: List[Dict[str, Any]] = [] + for rid in rooms: + if not isinstance(rid, str) or not rid.strip(): + continue + room_id = rid.strip() + name = "" + # Best-effort room name lookup (safe to fail). + try: + encoded = quote(room_id, safe="") + name_resp = requests.get( + f"{base}/_matrix/client/v3/rooms/{encoded}/state/m.room.name", + headers=headers, + timeout=5, + ) + if name_resp.status_code == 200: + payload = name_resp.json() or {} + maybe = payload.get("name") + if isinstance(maybe, str): + name = maybe + except Exception: + pass + out.append({"room_id": room_id, "name": name}) + return out + + def upload_to_room(self, file_path: str, room_id: str) -> str: + """Upload a file and send it to a specific room.""" + path = Path(file_path) + if not path.exists(): + raise FileNotFoundError(f"File not found: {file_path}") if not room_id: raise Exception("Matrix room_id missing") - if not homeserver.startswith("http"): - homeserver = f"https://{homeserver}" - - # Upload media - upload_url = f"{homeserver}/_matrix/media/v3/upload" + base, token = self._get_homeserver_and_token() headers = { - "Authorization": f"Bearer {access_token}", + "Authorization": f"Bearer {token}", "Content-Type": "application/octet-stream", } @@ -125,27 +164,22 @@ class Matrix(FileProvider): filename = path.name + # Upload media + upload_url = f"{base}/_matrix/media/v3/upload" with open(path, "rb") as handle: resp = requests.post(upload_url, headers=headers, data=handle, params={"filename": filename}) - if resp.status_code != 200: raise Exception(f"Matrix upload failed: {resp.text}") - - content_uri = resp.json().get("content_uri") + content_uri = (resp.json() or {}).get("content_uri") if not content_uri: raise Exception("No content_uri returned") - # Send message - send_url = f"{homeserver}/_matrix/client/v3/rooms/{room_id}/send/m.room.message" - # Determine message type msgtype = "m.file" ext = path.suffix.lower() - audio_exts = {".mp3", ".flac", ".wav", ".m4a", ".aac", ".ogg", ".opus", ".wma", ".mka", ".alac"} video_exts = {".mp4", ".mkv", ".webm", ".mov", ".avi", ".flv", ".mpg", ".mpeg", ".ts", ".m4v", ".wmv"} image_exts = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff"} - if ext in audio_exts: msgtype = "m.audio" elif ext in video_exts: @@ -156,9 +190,21 @@ class Matrix(FileProvider): info = {"mimetype": mime_type, "size": path.stat().st_size} payload = {"msgtype": msgtype, "body": filename, "url": content_uri, "info": info} - resp = requests.post(send_url, headers=headers, json=payload) - if resp.status_code != 200: - raise Exception(f"Matrix send message failed: {resp.text}") + # Correct Matrix client API send endpoint requires a transaction ID. + txn_id = f"mm_{int(time.time())}_{uuid.uuid4().hex[:8]}" + encoded_room = quote(str(room_id), safe="") + send_url = f"{base}/_matrix/client/v3/rooms/{encoded_room}/send/m.room.message/{txn_id}" + send_headers = {"Authorization": f"Bearer {token}"} + send_resp = requests.put(send_url, headers=send_headers, json=payload) + if send_resp.status_code != 200: + raise Exception(f"Matrix send message failed: {send_resp.text}") - event_id = resp.json().get("event_id") - return f"https://matrix.to/#/{room_id}/{event_id}" + event_id = (send_resp.json() or {}).get("event_id") + return f"https://matrix.to/#/{room_id}/{event_id}" if event_id else f"https://matrix.to/#/{room_id}" + + def upload(self, file_path: str, **kwargs: Any) -> str: + matrix_conf = self.config.get("provider", {}).get("matrix", {}) + room_id = matrix_conf.get("room_id") + if not room_id: + raise Exception("Matrix room_id missing") + return self.upload_to_room(file_path, str(room_id)) diff --git a/Provider/soulseek.py b/Provider/soulseek.py index 2ad7cf0..61ce508 100644 --- a/Provider/soulseek.py +++ b/Provider/soulseek.py @@ -182,6 +182,20 @@ class Soulseek(SearchProvider): DOWNLOAD_DIR = "./downloads" MAX_WAIT_TRANSFER = 1200 + def __init__(self, config: Optional[Dict[str, Any]] = None): + super().__init__(config) + try: + from config import get_soulseek_username, get_soulseek_password + + user = get_soulseek_username(self.config) + pwd = get_soulseek_password(self.config) + if user: + Soulseek.USERNAME = user + if pwd: + Soulseek.PASSWORD = pwd + except Exception: + pass + def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]: """Download file from Soulseek.""" @@ -433,8 +447,16 @@ class Soulseek(SearchProvider): def validate(self) -> bool: try: from aioslsk.client import SoulSeekClient # noqa: F401 + # Require configured credentials. + try: + from config import get_soulseek_username, get_soulseek_password - return True + user = get_soulseek_username(self.config) + pwd = get_soulseek_password(self.config) + return bool(user and pwd) + except Exception: + # Fall back to legacy class defaults if config helpers aren't available. + return bool(Soulseek.USERNAME and Soulseek.PASSWORD) except ImportError: return False @@ -444,6 +466,9 @@ async def download_soulseek_file( filename: str, output_dir: Path = Path("./downloads"), timeout: int = 1200, + *, + client_username: Optional[str] = None, + client_password: Optional[str] = None, ) -> Optional[Path]: """Download a file from a Soulseek peer.""" @@ -471,14 +496,19 @@ async def download_soulseek_file( output_path = output_path.resolve() - settings = Settings(credentials=CredentialsSettings(username=Soulseek.USERNAME, password=Soulseek.PASSWORD)) + login_user = (client_username or Soulseek.USERNAME or "").strip() + login_pass = (client_password or Soulseek.PASSWORD or "").strip() + if not login_user or not login_pass: + raise RuntimeError("Soulseek credentials not configured (set provider=soulseek username/password)") + + settings = Settings(credentials=CredentialsSettings(username=login_user, password=login_pass)) client = SoulSeekClient(settings) with _suppress_aioslsk_noise(): try: await client.start() await client.login() - debug(f"[soulseek] Logged in as {Soulseek.USERNAME}") + debug(f"[soulseek] Logged in as {login_user}") debug(f"[soulseek] Requesting download from {username}: {filename}") diff --git a/ProviderCore/registry.py b/ProviderCore/registry.py index f7f5b7a..df6896a 100644 --- a/ProviderCore/registry.py +++ b/ProviderCore/registry.py @@ -12,6 +12,7 @@ import sys from SYS.logger import log from ProviderCore.base import FileProvider, SearchProvider, SearchResult +from Provider.alldebrid import AllDebrid from Provider.bandcamp import Bandcamp from Provider.libgen import Libgen from Provider.matrix import Matrix @@ -22,6 +23,7 @@ from Provider.zeroxzero import ZeroXZero _SEARCH_PROVIDERS: Dict[str, Type[SearchProvider]] = { + "alldebrid": AllDebrid, "libgen": Libgen, "openlibrary": OpenLibrary, "soulseek": Soulseek, diff --git a/Store/HydrusNetwork.py b/Store/HydrusNetwork.py index 58ae0c7..d5a4f31 100644 --- a/Store/HydrusNetwork.py +++ b/Store/HydrusNetwork.py @@ -179,7 +179,7 @@ class HydrusNetwork(Store): try: metadata = client.fetch_file_metadata(hashes=[file_hash]) if metadata and isinstance(metadata, dict): - files = metadata.get("file_metadata", []) + files = metadata.get("metadata", []) if files: file_exists = True log( @@ -376,12 +376,12 @@ class HydrusNetwork(Store): if not pattern or pattern == "*": metadata_list = _iter_url_filtered_metadata(None, want_any=True, fetch_limit=int(limit) if limit else 100) else: - # Fast-path: exact URL via /add_url/get_url_files when a full URL is provided. + # Fast-path: exact URL via /add_urls/get_url_files when a full URL is provided. try: if pattern.startswith("http://") or pattern.startswith("https://"): from API.HydrusNetwork import HydrusRequestSpec - spec = HydrusRequestSpec(method="GET", endpoint="/add_url/get_url_files", query={"url": pattern}) + spec = HydrusRequestSpec(method="GET", endpoint="/add_urls/get_url_files", query={"url": pattern}) response = client._perform_request(spec) # type: ignore[attr-defined] hashes: list[str] = [] file_ids: list[int] = [] @@ -479,8 +479,8 @@ class HydrusNetwork(Store): if not isinstance(metadata_list, list): metadata_list = [] - - for meta in metadata_list: + + for meta in metadata_list: if len(results) >= limit: break @@ -527,14 +527,14 @@ class HydrusNetwork(Store): top_level_tags = meta.get("tags_flat", []) or meta.get("tags", []) _collect(top_level_tags) - # Resolve extension from MIME type + # Prefer Hydrus-provided extension (e.g. ".webm"); fall back to MIME map. mime_type = meta.get("mime") - ext = "" - if mime_type: + ext = str(meta.get("ext") or "").strip().lstrip('.') + if not ext and mime_type: for category in mime_maps.values(): for _ext_key, info in category.items(): if mime_type in info.get("mimes", []): - ext = info.get("ext", "").lstrip('.') + ext = str(info.get("ext", "")).strip().lstrip('.') break if ext: break @@ -605,8 +605,11 @@ class HydrusNetwork(Store): raise def get_file(self, file_hash: str, **kwargs: Any) -> Path | str | None: - """Open file in browser via Hydrus client API URL.""" - import webbrowser + """Return a browser URL for the file. + + IMPORTANT: this method must be side-effect free (do not auto-open a browser). + Only explicit user actions (e.g. the get-file cmdlet) should open files. + """ debug(f"[HydrusNetwork.get_file] Starting for hash: {file_hash[:12]}...") @@ -614,13 +617,6 @@ class HydrusNetwork(Store): base_url = str(self.URL).rstrip('/') access_key = str(self.API) browser_url = f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}" - debug(f"[HydrusNetwork.get_file] Opening URL: {browser_url}") - - # Open in default browser - webbrowser.open(browser_url) - debug(f"[HydrusNetwork.get_file] Browser opened successfully") - - # Return the URL string instead of downloading debug(f"[HydrusNetwork.get_file] Returning URL: {browser_url}") return browser_url @@ -664,24 +660,28 @@ class HydrusNetwork(Store): if title != f"Hydrus_{file_hash[:12]}": break - # Determine extension from mime type + # Prefer Hydrus-provided extension (e.g. ".webm"); fall back to MIME map if needed. mime_type = meta.get("mime", "") - ext = "" - if mime_type: - from SYS.utils_constant import mime_maps - for _category, extensions in mime_maps.items(): - for extension, mime in extensions.items(): - if mime == mime_type: - ext = extension.lstrip(".") + ext_raw = meta.get("ext") + ext = str(ext_raw or "").strip().lstrip(".") + if not ext and mime_type: + try: + from SYS.utils_constant import mime_maps + for category in mime_maps.values(): + for _ext_key, info in category.items(): + if mime_type in info.get("mimes", []): + ext = str(info.get("ext", "")).strip().lstrip(".") + break + if ext: break - if ext: - break + except Exception: + ext = "" return { "hash": file_hash, "title": title, "ext": ext, - "size": meta.get("size", 0), + "size": meta.get("size"), "mime": mime_type, } @@ -804,13 +804,31 @@ class HydrusNetwork(Store): if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): return [] - payload = client.fetch_file_metadata(hashes=[file_hash], include_file_url=True) + payload = client.fetch_file_metadata(hashes=[file_hash], include_file_url=False) items = payload.get("metadata") if isinstance(payload, dict) else None if not isinstance(items, list) or not items: return [] - meta = items[0] - url = meta.get("url") or [] - return list(url) + meta = items[0] if isinstance(items[0], dict) else {} + + raw_urls: Any = ( + meta.get("known_urls") + or meta.get("urls") + or meta.get("url") + or [] + ) + if isinstance(raw_urls, str): + val = raw_urls.strip() + return [val] if val else [] + if isinstance(raw_urls, list): + out: list[str] = [] + for u in raw_urls: + if not isinstance(u, str): + continue + u = u.strip() + if u: + out.append(u) + return out + return [] except Exception as exc: debug(f"Hydrus get_url failed: {exc}") return [] diff --git a/TUI/modalscreen/download.py b/TUI/modalscreen/download.py index 8f8c998..f24a41f 100644 --- a/TUI/modalscreen/download.py +++ b/TUI/modalscreen/download.py @@ -1123,12 +1123,12 @@ class DownloadModal(ModalScreen): selection: Selection string like "1-3" or "1,3,5" merge_enabled: Whether to merge the PDFs """ - # Check if PyPDF2 is available for merge (needed at function start) + # Check if pypdf is available for merge (needed at function start) try: - from PyPDF2 import PdfWriter, PdfReader - HAS_PYPDF2 = True + from pypdf import PdfWriter, PdfReader + HAS_PYPDF = True except ImportError: - HAS_PYPDF2 = False + HAS_PYPDF = False PdfWriter = None PdfReader = None @@ -1192,11 +1192,11 @@ class DownloadModal(ModalScreen): # Merge PDFs if requested if merge_enabled and len(downloaded_files) > 1: - if not HAS_PYPDF2: - logger.error("PyPDF2 not available for PDF merge") + if not HAS_PYPDF: + logger.error("pypdf not available for PDF merge") self.app.call_from_thread( self.app.notify, - "PyPDF2 required for PDF merge. Install with: pip install PyPDF2", + "pypdf required for PDF merge. Install with: pip install pypdf", title="Missing Dependency", severity="error" ) diff --git a/cmdlet/_shared.py b/cmdlet/_shared.py index 6abc0a5..d3dbfba 100644 --- a/cmdlet/_shared.py +++ b/cmdlet/_shared.py @@ -627,7 +627,14 @@ def get_hash_for_operation(override_hash: Optional[str], result: Any, field_name return normalize_hash(hash_value) -def fetch_hydrus_metadata(config: Any, hash_hex: str, **kwargs) -> tuple[Optional[Dict[str, Any]], Optional[int]]: +def fetch_hydrus_metadata( + config: Any, + hash_hex: str, + *, + store_name: Optional[str] = None, + hydrus_client: Any = None, + **kwargs, +) -> tuple[Optional[Dict[str, Any]], Optional[int]]: """Fetch metadata from Hydrus for a given hash, consolidating common fetch pattern. Eliminates repeated boilerplate: client initialization, error handling, metadata extraction. @@ -635,6 +642,8 @@ def fetch_hydrus_metadata(config: Any, hash_hex: str, **kwargs) -> tuple[Optiona Args: config: Configuration object (passed to hydrus_wrapper.get_client) hash_hex: File hash to fetch metadata for + store_name: Optional Hydrus store name. When provided, do not fall back to a global/default Hydrus client. + hydrus_client: Optional explicit Hydrus client. When provided, takes precedence. **kwargs: Additional arguments to pass to client.fetch_file_metadata() Common: include_service_keys_to_tags, include_notes, include_file_url, include_duration, etc. @@ -646,15 +655,33 @@ def fetch_hydrus_metadata(config: Any, hash_hex: str, **kwargs) -> tuple[Optiona from API import HydrusNetwork hydrus_wrapper = HydrusNetwork - try: - client = hydrus_wrapper.get_client(config) - except Exception as exc: - log(f"Hydrus client unavailable: {exc}") - return None, 1 - + client = hydrus_client if client is None: - log("Hydrus client unavailable") - return None, 1 + if store_name: + # Store specified: do not fall back to a global/default Hydrus client. + try: + from Store import Store + store = Store(config) + backend = store[str(store_name)] + candidate = getattr(backend, "_client", None) + if candidate is not None and hasattr(candidate, "fetch_file_metadata"): + client = candidate + except Exception as exc: + log(f"Hydrus client unavailable for store '{store_name}': {exc}") + client = None + if client is None: + log(f"Hydrus client unavailable for store '{store_name}'") + return None, 1 + else: + try: + client = hydrus_wrapper.get_client(config) + except Exception as exc: + log(f"Hydrus client unavailable: {exc}") + return None, 1 + + if client is None: + log("Hydrus client unavailable") + return None, 1 try: payload = client.fetch_file_metadata(hashes=[hash_hex], **kwargs) diff --git a/cmdlet/add_file.py b/cmdlet/add_file.py index 9aae609..76bc9bf 100644 --- a/cmdlet/add_file.py +++ b/cmdlet/add_file.py @@ -5,6 +5,7 @@ from pathlib import Path import sys import shutil import tempfile +import re import models import pipeline as ctx @@ -65,6 +66,15 @@ class Add_File(Cmdlet): stage_ctx = ctx.get_stage_context() is_last_stage = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False)) + # Determine if -store targets a registered backend (vs a filesystem export path). + is_storage_backend_location = False + if location: + try: + store_probe = Store(config) + is_storage_backend_location = location in (store_probe.list_backends() or []) + except Exception: + is_storage_backend_location = False + # Decide which items to process. # - If user provided -path, treat this invocation as single-item. # - Otherwise, if piped input is a list, ingest each item. @@ -81,13 +91,15 @@ class Add_File(Cmdlet): debug(f"[add-file] PARSED args: location={location}, provider={provider_name}, delete={delete_after}") collected_payloads: List[Dict[str, Any]] = [] + pending_relationship_pairs: Dict[str, set[tuple[str, str]]] = {} successes = 0 failures = 0 - # Only run the search-store refresh when add-file is the last stage. - # In the middle of a pipeline, downstream cmdlets should receive the emitted - # storage payload directly (no need to re-search and risk duplicate emits). - auto_search_store_after_add = bool(is_last_stage) and len(items_to_process) == 1 + # When add-file -store is the last stage, always show a final search-store table. + # This is especially important for multi-item ingests (e.g., multi-clip downloads) + # so the user always gets a selectable ResultTable. + want_final_search_store = bool(is_last_stage) and bool(is_storage_backend_location) and bool(location) + auto_search_store_after_add = False for item in items_to_process: pipe_obj = coerce_to_pipe_object(item, path_arg) @@ -217,7 +229,8 @@ class Add_File(Cmdlet): config, delete_after_item, collect_payloads=collected_payloads, - suppress_last_stage_overlay=is_last_stage and len(items_to_process) > 1, + collect_relationship_pairs=pending_relationship_pairs, + suppress_last_stage_overlay=want_final_search_store, auto_search_store=auto_search_store_after_add, ) else: @@ -243,16 +256,38 @@ class Add_File(Cmdlet): except Exception: pass - # If we processed multiple storage ingests, present a single consolidated overlay table. - if is_last_stage and len(items_to_process) > 1 and collected_payloads: + # Always end add-file -store (when last stage) by showing the canonical store table. + # This keeps output consistent and ensures @N selection works for multi-item ingests. + if want_final_search_store and collected_payloads: try: - from result_table import ResultTable - - table = ResultTable("Result") + hashes: List[str] = [] for payload in collected_payloads: - table.add_result(payload) - # Make this the active selectable table so @.. returns here (and playlist table is kept in history). - ctx.set_last_result_table(table, collected_payloads, subject=collected_payloads) + h = payload.get("hash") if isinstance(payload, dict) else None + if isinstance(h, str) and len(h) == 64: + hashes.append(h) + # Deduplicate while preserving order + seen: set[str] = set() + hashes = [h for h in hashes if not (h in seen or seen.add(h))] + + refreshed_items = Add_File._try_emit_search_store_by_hashes( + store=str(location), + hash_values=hashes, + config=config, + ) + if not refreshed_items: + # Fallback: at least show the add-file payloads as a display overlay + from result_table import ResultTable + table = ResultTable("Result") + for payload in collected_payloads: + table.add_result(payload) + ctx.set_last_result_table_overlay(table, collected_payloads, subject=collected_payloads) + except Exception: + pass + + # Persist relationships into backend DB/API. + if pending_relationship_pairs: + try: + Add_File._apply_pending_relationships(pending_relationship_pairs, config) except Exception: pass @@ -260,6 +295,259 @@ class Add_File(Cmdlet): return 0 return 1 + @staticmethod + def _try_emit_search_store_by_hashes(*, store: str, hash_values: List[str], config: Dict[str, Any]) -> Optional[List[Any]]: + """Run search-store for a list of hashes and promote the table to a display overlay. + + Returns the emitted search-store payload items on success, else None. + """ + hashes = [h for h in (hash_values or []) if isinstance(h, str) and len(h) == 64] + if not store or not hashes: + return None + + try: + from cmdlet.search_store import CMDLET as search_store_cmdlet + + query = "hash:" + ",".join(hashes) + args = ["-store", str(store), query] + log(f"[add-file] Refresh: search-store -store {store} \"{query}\"", file=sys.stderr) + + # Run search-store under a temporary stage context so its ctx.emit() calls + # don't interfere with the outer add-file pipeline stage. + prev_ctx = ctx.get_stage_context() + temp_ctx = ctx.PipelineStageContext(stage_index=0, total_stages=1, worker_id=getattr(prev_ctx, "worker_id", None)) + ctx.set_stage_context(temp_ctx) + try: + code = search_store_cmdlet.run(None, args, config) + emitted_items = list(getattr(temp_ctx, "emits", []) or []) + finally: + ctx.set_stage_context(prev_ctx) + + if code != 0: + return None + + # Promote the search-store result to a display overlay so the CLI prints it + # for action commands like add-file. + stage_ctx = ctx.get_stage_context() + is_last = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False)) + if is_last: + try: + table = ctx.get_last_result_table() + items = ctx.get_last_result_items() + if table is not None and items: + ctx.set_last_result_table_overlay(table, items, subject={"store": store, "hash": hashes}) + except Exception: + pass + + return emitted_items + except Exception as exc: + debug(f"[add-file] Failed to run search-store after add-file: {type(exc).__name__}: {exc}") + return None + + @staticmethod + def _parse_relationship_tag_king_alts(tag_value: str) -> tuple[Optional[str], List[str]]: + """Parse a relationship tag into (king_hash, alt_hashes). + + Supported formats: + - New: relationship: ,, + - Old: relationship: hash(king),hash(alt)... + relationship: hash(king)KING,hash(alt)ALT + + For the local DB we treat the first hash listed as the king. + """ + if not isinstance(tag_value, str): + return None, [] + + raw = tag_value.strip() + if not raw: + return None, [] + + # Normalize input: ensure we only look at the RHS after "relationship:" + rhs = raw + if ":" in raw: + prefix, rest = raw.split(":", 1) + if prefix.strip().lower() == "relationship": + rhs = rest.strip() + + # Old typed format: hash(type)HEX + typed = re.findall(r"hash\((\w+)\)?", rhs) + if typed: + king: Optional[str] = None + alts: List[str] = [] + for rel_type, h in typed: + h_norm = str(h).strip().lower() + if rel_type.strip().lower() == "king": + king = h_norm + elif rel_type.strip().lower() in {"alt", "related"}: + alts.append(h_norm) + # If the tag omitted king but had hashes, fall back to first hash. + if not king: + all_hashes = [str(h).strip().lower() for _, h in typed] + king = all_hashes[0] if all_hashes else None + alts = [h for h in all_hashes[1:] if h] + # Dedupe alts while preserving order + seen: set[str] = set() + alts = [h for h in alts if h and len(h) == 64 and not (h in seen or seen.add(h))] + if king and len(king) == 64: + return king, [h for h in alts if h != king] + return None, [] + + # New format: a simple list of hashes, first is king. + hashes = re.findall(r"\b[a-fA-F0-9]{64}\b", rhs) + hashes = [h.strip().lower() for h in hashes if isinstance(h, str)] + if not hashes: + return None, [] + king = hashes[0] + alts = hashes[1:] + seen2: set[str] = set() + alts = [h for h in alts if h and len(h) == 64 and not (h in seen2 or seen2.add(h))] + return king, [h for h in alts if h != king] + + @staticmethod + def _parse_relationships_king_alts(relationships: Dict[str, Any]) -> tuple[Optional[str], List[str]]: + """Parse a PipeObject.relationships dict into (king_hash, alt_hashes). + + Supported shapes: + - {"king": [KING], "alt": [ALT1, ALT2]} + - {"king": KING, "alt": ALT} (strings) + - Also treats "related" hashes as alts for persistence purposes. + """ + if not isinstance(relationships, dict) or not relationships: + return None, [] + + def _first_hash(val: Any) -> Optional[str]: + if isinstance(val, str): + h = val.strip().lower() + return h if len(h) == 64 else None + if isinstance(val, list): + for item in val: + if isinstance(item, str): + h = item.strip().lower() + if len(h) == 64: + return h + return None + + def _many_hashes(val: Any) -> List[str]: + out: List[str] = [] + if isinstance(val, str): + h = val.strip().lower() + if len(h) == 64: + out.append(h) + elif isinstance(val, list): + for item in val: + if isinstance(item, str): + h = item.strip().lower() + if len(h) == 64: + out.append(h) + return out + + king = _first_hash(relationships.get("king")) + if not king: + return None, [] + + alts = _many_hashes(relationships.get("alt")) + alts.extend(_many_hashes(relationships.get("related"))) + + seen: set[str] = set() + alts = [h for h in alts if h and h != king and not (h in seen or seen.add(h))] + return king, alts + + @staticmethod + def _apply_pending_relationships(pending: Dict[str, set[tuple[str, str]]], config: Dict[str, Any]) -> None: + """Persist relationships into the appropriate backend DB/API. + + - Folder stores: write to the per-store SQLite DB (directional alt->king). + - Hydrus stores: call Hydrus relationship API. + """ + if not pending: + return + + try: + store = Store(config) + except Exception: + return + + for backend_name, pairs in pending.items(): + if not pairs: + continue + + try: + backend = store[str(backend_name)] + except Exception: + continue + + backend_type = type(backend).__name__.lower() + + # Folder-backed local DB + location_fn = getattr(backend, "location", None) + is_folder = type(backend).__name__ == "Folder" and callable(location_fn) + if is_folder and location_fn is not None: + try: + root = Path(str(location_fn())).expanduser() + with API_folder_store(root) as db: + processed_pairs: set[tuple[str, str]] = set() + for alt_hash, king_hash in sorted(pairs): + if not alt_hash or not king_hash or alt_hash == king_hash: + continue + if (alt_hash, king_hash) in processed_pairs: + continue + # Hash-first store DB write; skips if either hash isn't in this store DB. + try: + db.set_relationship_by_hash(str(alt_hash), str(king_hash), "alt", bidirectional=False) + except Exception: + continue + processed_pairs.add((alt_hash, king_hash)) + except Exception: + pass + continue + + # Hydrus + if "hydrus" in backend_type or hasattr(backend, "_client"): + client: Any = getattr(backend, "_client", None) + # Do not fall back to a global/default Hydrus client here; relationships must not be cross-store. + if client is None or not hasattr(client, "set_relationship"): + continue + + def _hash_exists(hash_hex: str) -> bool: + try: + if not hasattr(client, "fetch_file_metadata"): + return False + payload = client.fetch_file_metadata( + hashes=[hash_hex], + include_service_keys_to_tags=False, + include_file_url=False, + include_duration=False, + include_size=False, + include_mime=False, + include_notes=False, + ) + meta = payload.get("metadata") if isinstance(payload, dict) else None + return bool(isinstance(meta, list) and meta) + except Exception: + return False + + processed_pairs: set[tuple[str, str]] = set() + for alt_hash, king_hash in sorted(pairs): + if not alt_hash or not king_hash or alt_hash == king_hash: + continue + if (alt_hash, king_hash) in processed_pairs: + continue + try: + alt_norm = str(alt_hash).strip().lower() + king_norm = str(king_hash).strip().lower() + if len(alt_norm) != 64 or len(king_norm) != 64: + continue + if not _hash_exists(alt_norm) or not _hash_exists(king_norm): + continue + client.set_relationship(alt_norm, king_norm, "alt") + processed_pairs.add((alt_hash, king_hash)) + except Exception: + pass + continue + + # Other backends: no-op + _ = backend_type + @staticmethod def _resolve_source( result: Any, @@ -310,7 +598,10 @@ class Add_File(Cmdlet): debug(f"Resolved pipe_path: {pipe_path_str}") if pipe_path_str.startswith("hydrus:"): file_hash = pipe_path_str.split(":", 1)[1] - media_path, success = Add_File._fetch_hydrus_path(file_hash, config) + store_name = getattr(pipe_obj, "store", None) + if not store_name and isinstance(pipe_obj.extra, dict): + store_name = pipe_obj.extra.get("store") + media_path, success = Add_File._fetch_hydrus_path(file_hash, config, store_name=str(store_name).strip() if store_name else None) return media_path, file_hash if success else None if pipe_path_str.lower().startswith(("http://", "https://", "magnet:", "torrent:")): return pipe_path_str, None @@ -331,7 +622,10 @@ class Add_File(Cmdlet): hydrus_hash = hydrus_hash or pipe_obj.hash if hydrus_hash and hydrus_hash != "unknown": - media_path, success = Add_File._fetch_hydrus_path(str(hydrus_hash), config) + store_name = getattr(pipe_obj, "store", None) + if not store_name and isinstance(pipe_obj.extra, dict): + store_name = pipe_obj.extra.get("store") + media_path, success = Add_File._fetch_hydrus_path(str(hydrus_hash), config, store_name=str(store_name).strip() if store_name else None) return media_path, str(hydrus_hash) if success else None # Try from result (if it's a string path or URL) @@ -395,13 +689,32 @@ class Add_File(Cmdlet): return None, None @staticmethod - def _fetch_hydrus_path(file_hash: str, config: Dict[str, Any]) -> Tuple[Optional[Path], bool]: + def _fetch_hydrus_path( + file_hash: str, + config: Dict[str, Any], + store_name: Optional[str] = None, + ) -> Tuple[Optional[Path], bool]: """Fetch the physical path of a file from Hydrus using its hash.""" if not file_hash: return None, False try: - client = hydrus_wrapper.get_client(config) + client = None + if store_name: + # Store specified: do not fall back to a global/default Hydrus client. + try: + store = Store(config) + backend = store[str(store_name)] + candidate = getattr(backend, "_client", None) + if candidate is not None and hasattr(candidate, "get_file_path"): + client = candidate + except Exception: + client = None + if client is None: + log(f"❌ Hydrus client unavailable for store '{store_name}'", file=sys.stderr) + return None, False + else: + client = hydrus_wrapper.get_client(config) if not client: log("❌ Hydrus client not available", file=sys.stderr) return None, False @@ -630,7 +943,7 @@ class Add_File(Cmdlet): pass @staticmethod - def _try_emit_search_store_by_hash(*, store: str, hash_value: str, config: Dict[str, Any]) -> bool: + def _try_emit_search_store_by_hash(*, store: str, hash_value: str, config: Dict[str, Any]) -> Optional[List[Any]]: """Run search-store for a single hash so the final table/payload is consistent. Important: `add-file` is treated as an action command by the CLI, so the CLI only @@ -638,7 +951,7 @@ class Add_File(Cmdlet): this copies the resulting table into the display overlay (when this is the last stage) so the canonical store table is what the user sees and can select from. - Returns True if search-store ran successfully, else False. + Returns the emitted search-store payload items on success, else None. """ try: from cmdlet.search_store import CMDLET as search_store_cmdlet @@ -653,10 +966,11 @@ class Add_File(Cmdlet): ctx.set_stage_context(temp_ctx) try: code = search_store_cmdlet.run(None, args, config) + emitted_items = list(getattr(temp_ctx, "emits", []) or []) finally: ctx.set_stage_context(prev_ctx) if code != 0: - return False + return None # Promote the search-store result to a display overlay so the CLI prints it # for action commands like add-file. @@ -671,10 +985,10 @@ class Add_File(Cmdlet): except Exception: pass - return True + return emitted_items except Exception as exc: debug(f"[add-file] Failed to run search-store after add-file: {type(exc).__name__}: {exc}") - return False + return None @staticmethod def _prepare_metadata( @@ -735,6 +1049,39 @@ class Add_File(Cmdlet): file_hash = Add_File._resolve_file_hash(result, media_path, pipe_obj, sidecar_hash) + # Relationships must not be stored as tags. + # If relationship tags exist (legacy sidecar format), capture them into PipeObject.relationships + # and strip them from the final tag list. + relationship_tags = [ + t for t in merged_tags + if isinstance(t, str) and t.strip().lower().startswith("relationship:") + ] + if relationship_tags: + try: + if not isinstance(getattr(pipe_obj, "relationships", None), dict) or not pipe_obj.relationships: + king: Optional[str] = None + alts: List[str] = [] + for rel_tag in relationship_tags: + k, a = Add_File._parse_relationship_tag_king_alts(rel_tag) + if k and not king: + king = k + if a: + alts.extend(a) + if king: + seen_alt: set[str] = set() + alts = [h for h in alts if h and h != king and len(h) == 64 and not (h in seen_alt or seen_alt.add(h))] + payload: Dict[str, Any] = {"king": [king]} + if alts: + payload["alt"] = alts + pipe_obj.relationships = payload + except Exception: + pass + + merged_tags = [ + t for t in merged_tags + if not (isinstance(t, str) and t.strip().lower().startswith("relationship:")) + ] + # Persist back to PipeObject pipe_obj.tag = merged_tags if preferred_title and not pipe_obj.title: @@ -880,6 +1227,19 @@ class Add_File(Cmdlet): return None debug(f"[add-file] Starting soulseek download: {username} -> {filename}") + + # Read Soulseek login credentials from config (client credentials), separate from peer username. + try: + from config import get_soulseek_username, get_soulseek_password + client_user = get_soulseek_username(config) or "" + client_pass = get_soulseek_password(config) or "" + except Exception: + client_user = "" + client_pass = "" + + if not client_user or not client_pass: + debug("[add-file] ERROR: Soulseek credentials missing (set [provider=soulseek] username/password in config.conf)") + return None # Determine output directory (prefer downloads folder in config) output_dir = Path(config.get("output_dir", "./downloads")) if isinstance(config.get("output_dir"), str) else Path("./downloads") @@ -900,6 +1260,8 @@ class Add_File(Cmdlet): username=username, filename=filename, output_dir=output_dir, + client_username=client_user, + client_password=client_pass, timeout=1200 # 20 minutes ) ) @@ -937,9 +1299,15 @@ class Add_File(Cmdlet): f_hash = Add_File._resolve_file_hash(None, media_path, pipe_obj, None) if f_hash: try: - client = hydrus_wrapper.get_client(config) - if client: - client.associate_url(f_hash, hoster_url) + # Only associate when we can target an explicit Hydrus store backend. + # Do not fall back to a global/default Hydrus client. + store_name = getattr(pipe_obj, "store", None) + if store_name: + store = Store(config) + backend = store[str(store_name)] + client = getattr(backend, "_client", None) + if client is not None and hasattr(client, "associate_url"): + client.associate_url(str(f_hash), hoster_url) except Exception: pass @@ -984,6 +1352,7 @@ class Add_File(Cmdlet): delete_after: bool, *, collect_payloads: Optional[List[Dict[str, Any]]] = None, + collect_relationship_pairs: Optional[Dict[str, set[tuple[str, str]]]] = None, suppress_last_stage_overlay: bool = False, auto_search_store: bool = True, ) -> int: @@ -1018,6 +1387,21 @@ class Add_File(Cmdlet): # Prepare metadata from pipe_obj and sidecars tags, url, title, f_hash = Add_File._prepare_metadata(result, media_path, pipe_obj, config) + + # Collect relationship pairs for post-ingest DB/API persistence. + if collect_relationship_pairs is not None: + rels = Add_File._get_relationships(result, pipe_obj) + if isinstance(rels, dict) and rels: + king_hash, alt_hashes = Add_File._parse_relationships_king_alts(rels) + if king_hash and alt_hashes: + bucket = collect_relationship_pairs.setdefault(str(backend_name), set()) + for alt_hash in alt_hashes: + if alt_hash and alt_hash != king_hash: + bucket.add((alt_hash, king_hash)) + + # Relationships must never be stored as tags. + if isinstance(tags, list) and tags: + tags = [t for t in tags if not (isinstance(t, str) and t.strip().lower().startswith("relationship:"))] # Call backend's add_file with full metadata # Backend returns hash as identifier @@ -1030,13 +1414,16 @@ class Add_File(Cmdlet): log(f"✓ File added to '{backend_name}': {file_identifier}", file=sys.stderr) stored_path: Optional[str] = None + # IMPORTANT: avoid calling get_file() for remote backends. + # For Hydrus, get_file() returns a browser URL (often with an access key) and should + # only be invoked by explicit user commands (e.g. get-file). try: - maybe_path = backend.get_file(file_identifier) - if isinstance(maybe_path, Path): - stored_path = str(maybe_path) - elif isinstance(maybe_path, str) and maybe_path: - # Some backends may return a browser URL - stored_path = maybe_path + if type(backend).__name__ == "Folder": + maybe_path = backend.get_file(file_identifier) + if isinstance(maybe_path, Path): + stored_path = str(maybe_path) + elif isinstance(maybe_path, str) and maybe_path: + stored_path = maybe_path except Exception: stored_path = None @@ -1119,12 +1506,16 @@ class Add_File(Cmdlet): # Show the add-file summary (overlay only) but let search-store provide the downstream payload. Add_File._emit_storage_result(payload, overlay=not suppress_last_stage_overlay, emit=False) - ok = Add_File._try_emit_search_store_by_hash( + refreshed_items = Add_File._try_emit_search_store_by_hash( store=backend_name, hash_value=resolved_hash, config=config, ) - if not ok: + if refreshed_items: + # Re-emit the canonical store rows so downstream stages receive them. + for emitted in refreshed_items: + ctx.emit(emitted) + else: # Fall back to emitting the add-file payload so downstream stages still receive an item. ctx.emit(payload) else: diff --git a/cmdlet/add_relationship.py b/cmdlet/add_relationship.py index 13e71dd..fbaa51c 100644 --- a/cmdlet/add_relationship.py +++ b/cmdlet/add_relationship.py @@ -3,18 +3,17 @@ from __future__ import annotations from typing import Any, Dict, Optional, Sequence -import json import re from pathlib import Path import sys from SYS.logger import log -import models import pipeline as ctx from API import HydrusNetwork as hydrus_wrapper -from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input, should_show_help, get_field -from API.folder import read_sidecar, find_sidecar +from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, normalize_result_input, should_show_help, get_field +from API.folder import read_sidecar, find_sidecar, API_folder_store +from Store import Store CMDLET = Cmdlet( @@ -23,13 +22,19 @@ CMDLET = Cmdlet( usage="@1-3 | add-relationship -king @4 OR add-relationship -path OR @1,@2,@3 | add-relationship", arg=[ CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."), + SharedArgs.STORE, + SharedArgs.HASH, CmdletArg("-king", type="string", description="Explicitly set the king hash/file for relationships (e.g., -king @4 or -king hash)"), + CmdletArg("-alt", type="string", description="Explicitly select alt item(s) by @ selection or hash list (e.g., -alt @3-5 or -alt ,)"), CmdletArg("-type", type="string", description="Relationship type for piped items (default: 'alt', options: 'king', 'alt', 'related')"), ], detail=[ "- Mode 1: Pipe multiple items, first becomes king, rest become alts (default)", "- Mode 2: Use -king to explicitly set which item/hash is the king: @1-3 | add-relationship -king @4", - "- Mode 3: Read relationships from sidecar (format: 'relationship: hash(king),hash(alt)...')", + "- Mode 2b: Use -king and -alt to select both sides from the last table: add-relationship -king @1 -alt @3-5", + "- Mode 3: Read relationships from sidecar tags:", + " - New format: 'relationship: ,,' (first hash is king)", + " - Legacy: 'relationship: hash(king),hash(alt)...'", "- Supports three relationship types: king (primary), alt (alternative), related (other versions)", "- When using -king, all piped items become the specified relationship type to the king", ], @@ -47,39 +52,236 @@ def _normalise_hash_hex(value: Optional[str]) -> Optional[str]: def _extract_relationships_from_tag(tag_value: str) -> Dict[str, list[str]]: - """Parse relationship tag like 'relationship: hash(king),hash(alt)'. - + """Parse relationship tags. + + Supported formats: + - New: relationship: ,, + - Old: relationship: hash(king),hash(alt)... + Returns a dict like {"king": ["HASH1"], "alt": ["HASH2"], ...} """ result: Dict[str, list[str]] = {} if not isinstance(tag_value, str): return result - # Match patterns like hash(king)HASH or hash(type)HASH (no angle brackets) - pattern = r'hash\((\w+)\)([a-fA-F0-9]{64})' + # Match patterns like hash(king)HASH or hash(type) + pattern = r'hash\((\w+)\)?' matches = re.findall(pattern, tag_value) - - for rel_type, hash_value in matches: - normalized = _normalise_hash_hex(hash_value) - if normalized: - if rel_type not in result: - result[rel_type] = [] - result[rel_type].append(normalized) - + + if matches: + for rel_type, hash_value in matches: + normalized = _normalise_hash_hex(hash_value) + if normalized: + if rel_type not in result: + result[rel_type] = [] + result[rel_type].append(normalized) + return result + + # New format: extract hashes, first is king + hashes = re.findall(r"\b[a-fA-F0-9]{64}\b", tag_value) + hashes = [h.strip().lower() for h in hashes if isinstance(h, str)] + if not hashes: + return result + king = _normalise_hash_hex(hashes[0]) + if not king: + return result + result["king"] = [king] + alts: list[str] = [] + for h in hashes[1:]: + normalized = _normalise_hash_hex(h) + if normalized and normalized != king: + alts.append(normalized) + if alts: + result["alt"] = alts return result +def _apply_relationships_from_tags( + relationship_tags: Sequence[str], + *, + hydrus_client: Any, + use_local_storage: bool, + local_storage_path: Optional[Path], + config: Dict[str, Any], +) -> int: + """Persist relationship tags into Hydrus or local DB. + + Local DB semantics: + - Treat the first hash (king) as the king. + - Store directional alt -> king relationships (no reverse edge). + """ + rel_tags = [t for t in relationship_tags if isinstance(t, str) and t.strip().lower().startswith("relationship:")] + if not rel_tags: + return 0 + + # Prefer Hydrus if available (hash-based relationships map naturally). + if hydrus_client is not None and hasattr(hydrus_client, "set_relationship"): + processed: set[tuple[str, str, str]] = set() + for tag in rel_tags: + rels = _extract_relationships_from_tag(tag) + king = (rels.get("king") or [None])[0] + if not king: + continue + king_norm = _normalise_hash_hex(king) + if not king_norm: + continue + + for rel_type in ("alt", "related"): + for other in rels.get(rel_type, []) or []: + other_norm = _normalise_hash_hex(other) + if not other_norm or other_norm == king_norm: + continue + key = (other_norm, king_norm, rel_type) + if key in processed: + continue + try: + hydrus_client.set_relationship(other_norm, king_norm, rel_type) + processed.add(key) + except Exception: + pass + return 0 + + # Local DB fallback (store/hash-first) + if use_local_storage and local_storage_path is not None: + try: + with API_folder_store(local_storage_path) as db: + processed_pairs: set[tuple[str, str]] = set() + for tag in rel_tags: + rels = _extract_relationships_from_tag(tag) + king = (rels.get("king") or [None])[0] + if not king: + continue + king_norm = _normalise_hash_hex(king) + if not king_norm: + continue + + # For local DB we treat all non-king hashes as alts. + alt_hashes: list[str] = [] + for bucket in ("alt", "related"): + alt_hashes.extend([h for h in (rels.get(bucket) or []) if isinstance(h, str)]) + + for alt in alt_hashes: + alt_norm = _normalise_hash_hex(alt) + if not alt_norm or alt_norm == king_norm: + continue + if (alt_norm, king_norm) in processed_pairs: + continue + db.set_relationship_by_hash(alt_norm, king_norm, "alt", bidirectional=False) + processed_pairs.add((alt_norm, king_norm)) + except Exception: + return 1 + return 0 + + return 0 + + +def _parse_at_selection(token: str) -> Optional[list[int]]: + """Parse standard @ selection syntax into a list of 0-based indices. + + Supports: @2, @2-5, @{1,3,5}, @3,5,7, @3-6,8, @* + """ + if not isinstance(token, str): + return None + t = token.strip() + if not t.startswith('@'): + return None + if t == "@*": + return [] # special sentinel: caller interprets as "all" + + selector = t[1:].strip() + if not selector: + return None + if selector.startswith("{") and selector.endswith("}"): + selector = selector[1:-1].strip() + + parts = [p.strip() for p in selector.split(",") if p.strip()] + if not parts: + return None + + indices_1based: set[int] = set() + for part in parts: + try: + if "-" in part: + start_s, end_s = part.split("-", 1) + start = int(start_s.strip()) + end = int(end_s.strip()) + if start <= 0 or end <= 0 or start > end: + return None + for i in range(start, end + 1): + indices_1based.add(i) + else: + num = int(part) + if num <= 0: + return None + indices_1based.add(num) + except Exception: + return None + + return sorted(i - 1 for i in indices_1based) + + +def _resolve_items_from_at(token: str) -> Optional[list[Any]]: + """Resolve @ selection token into actual items from the current result context.""" + items = ctx.get_last_result_items() + if not items: + return None + parsed = _parse_at_selection(token) + if parsed is None: + return None + if token.strip() == "@*": + return list(items) + selected: list[Any] = [] + for idx in parsed: + if 0 <= idx < len(items): + selected.append(items[idx]) + return selected + + +def _extract_hash_and_store(item: Any) -> tuple[Optional[str], Optional[str]]: + """Extract (hash_hex, store) from a result item (dict/object).""" + try: + h = get_field(item, "hash_hex") or get_field(item, "hash") or get_field(item, "file_hash") + s = get_field(item, "store") + + hash_norm = _normalise_hash_hex(str(h) if h is not None else None) + + store_norm: Optional[str] + if s is None: + store_norm = None + else: + store_norm = str(s).strip() + if not store_norm: + store_norm = None + + return hash_norm, store_norm + except Exception: + return None, None + + +def _hydrus_hash_exists(hydrus_client: Any, hash_hex: str) -> bool: + """Best-effort check whether a hash exists in the connected Hydrus backend.""" + try: + if hydrus_client is None or not hasattr(hydrus_client, "fetch_file_metadata"): + return False + payload = hydrus_client.fetch_file_metadata( + hashes=[hash_hex], + include_service_keys_to_tags=False, + include_file_url=False, + include_duration=False, + include_size=False, + include_mime=False, + include_notes=False, + ) + meta = payload.get("metadata") if isinstance(payload, dict) else None + return bool(isinstance(meta, list) and meta) + except Exception: + return False + + def _resolve_king_reference(king_arg: str) -> Optional[str]: - """Resolve a king reference like '@4' to its actual hash or path. - - Supports: - - Direct hash: '0123456789abcdef...' (64 chars) - - Selection reference: '@4' (resolves from pipeline context) - - Returns: - - For Hydrus items: normalized hash - - For local storage items: file path - - None if not found + """Resolve a king reference like '@4' to its actual hash. + + Store/hash mode intentionally avoids file-path dependency. """ if not king_arg: return None @@ -89,53 +291,30 @@ def _resolve_king_reference(king_arg: str) -> Optional[str]: if normalized: return normalized - # Try to resolve as @N selection from pipeline context + # Try to resolve as @ selection from pipeline context if king_arg.startswith('@'): - try: - # Get the result items from the pipeline context - from pipeline import get_last_result_items - items = get_last_result_items() - if not items: - log(f"Cannot resolve {king_arg}: no search results in context", file=sys.stderr) - return None - - # Parse @N to get the index (1-based) - index_str = king_arg[1:] # Remove '@' - index = int(index_str) - 1 # Convert to 0-based - - if 0 <= index < len(items): - item = items[index] - - # Try to extract hash from the item (could be dict or object) - item_hash = ( - get_field(item, 'hash_hex') - or get_field(item, 'hash') - or get_field(item, 'file_hash') - ) - - if item_hash: - normalized = _normalise_hash_hex(item_hash) - if normalized: - return normalized - - # If no hash, try to get file path (for local storage) - file_path = ( - get_field(item, 'file_path') - or get_field(item, 'path') - or get_field(item, 'target') - ) - - if file_path: - return str(file_path) - - log(f"Item {king_arg} has no hash or path information", file=sys.stderr) - return None - else: - log(f"Index {king_arg} out of range", file=sys.stderr) - return None - except (ValueError, IndexError) as e: - log(f"Cannot resolve {king_arg}: {e}", file=sys.stderr) + selected = _resolve_items_from_at(king_arg) + if not selected: + log(f"Cannot resolve {king_arg}: no selection context", file=sys.stderr) return None + if len(selected) != 1: + log(f"{king_arg} selects {len(selected)} items; -king requires exactly 1", file=sys.stderr) + return None + + item = selected[0] + item_hash = ( + get_field(item, 'hash_hex') + or get_field(item, 'hash') + or get_field(item, 'file_hash') + ) + + if item_hash: + normalized = _normalise_hash_hex(str(item_hash)) + if normalized: + return normalized + + log(f"Item {king_arg} has no hash information", file=sys.stderr) + return None return None @@ -208,7 +387,10 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: # Parse arguments using CMDLET spec parsed = parse_cmdlet_args(_args, CMDLET) arg_path: Optional[Path] = None + override_store = parsed.get("store") + override_hash = parsed.get("hash") king_arg = parsed.get("king") + alt_arg = parsed.get("alt") rel_type = parsed.get("type", "alt") raw_path = parsed.get("path") @@ -221,6 +403,45 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: # Handle @N selection which creates a list # Use normalize_result_input to handle both single items and lists items_to_process = normalize_result_input(result) + + # Allow selecting alt items directly from the last table via -alt @... + # This enables: add-relationship -king @1 -alt @3-5 + if alt_arg: + alt_text = str(alt_arg).strip() + resolved_alt_items: list[Any] = [] + if alt_text.startswith('@'): + selected = _resolve_items_from_at(alt_text) + if not selected: + log(f"Failed to resolve -alt {alt_text}: no selection context", file=sys.stderr) + return 1 + resolved_alt_items = selected + else: + # Treat as comma/semicolon-separated list of hashes + parts = [p.strip() for p in alt_text.replace(";", ",").split(",") if p.strip()] + hashes = [h for h in (_normalise_hash_hex(p) for p in parts) if h] + if not hashes: + log("Invalid -alt value (expected @ selection or 64-hex sha256 hash list)", file=sys.stderr) + return 1 + if not override_store: + log("-store is required when using -alt with a raw hash list", file=sys.stderr) + return 1 + resolved_alt_items = [{"hash": h, "store": str(override_store)} for h in hashes] + items_to_process = normalize_result_input(resolved_alt_items) + + # Allow explicit -hash operation (store/hash-first) + if (not items_to_process) and override_hash: + # Support comma-separated hashes + raw = str(override_hash) + parts = [p.strip() for p in raw.replace(";", ",").split(",")] + hashes = [h for h in (_normalise_hash_hex(p) for p in parts) if h] + if not hashes: + log("Invalid -hash value (expected 64-hex sha256)", file=sys.stderr) + return 1 + # Use the selected/override store; required in this mode + if not override_store: + log("-store is required when using -hash without piped items", file=sys.stderr) + return 1 + items_to_process = [{"hash": h, "store": str(override_store)} for h in hashes] if not items_to_process and not arg_path: log("No items provided to add-relationship (no piped result and no -path)", file=sys.stderr) @@ -230,42 +451,242 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: if not items_to_process and arg_path: items_to_process = [{"file_path": arg_path}] - # Import local storage utilities - from API.folder import LocalLibrarySearchOptimizer - from config import get_local_storage_path - - local_storage_path = get_local_storage_path(config) if config else None - - # Check if any items have Hydrus hashes (file_hash or hash_hex fields) - has_hydrus_hashes = any( - (isinstance(item, dict) and (item.get('hash_hex') or item.get('hash'))) - or (hasattr(item, 'hash_hex') or hasattr(item, 'hash')) - for item in items_to_process - ) - - # Only try to initialize Hydrus if we actually have Hydrus hashes to work with + # Resolve the king reference once (if provided) + king_hash: Optional[str] = None + king_store: Optional[str] = None + if king_arg: + king_text = str(king_arg).strip() + if king_text.startswith('@'): + selected = _resolve_items_from_at(king_text) + if not selected: + log(f"Cannot resolve {king_text}: no selection context", file=sys.stderr) + return 1 + if len(selected) != 1: + log(f"{king_text} selects {len(selected)} items; -king requires exactly 1", file=sys.stderr) + return 1 + king_hash, king_store = _extract_hash_and_store(selected[0]) + if not king_hash: + log(f"Item {king_text} has no hash information", file=sys.stderr) + return 1 + else: + king_hash = _resolve_king_reference(king_text) + if not king_hash: + log(f"Failed to resolve king argument: {king_text}", file=sys.stderr) + return 1 + + # Decide target store: override_store > (king store + piped item stores) (must be consistent) + store_name: Optional[str] = str(override_store).strip() if override_store else None + if not store_name: + stores = set() + if king_store: + stores.add(str(king_store)) + for item in items_to_process: + s = get_field(item, "store") + if s: + stores.add(str(s)) + if len(stores) == 1: + store_name = next(iter(stores)) + elif len(stores) > 1: + log("Multiple stores detected (king/alt across stores); use -store and ensure all selections are from the same store", file=sys.stderr) + return 1 + + # Enforce same-store relationships when store context is available. + if king_store and store_name and str(king_store) != str(store_name): + log(f"Cross-store relationship blocked: king is in store '{king_store}' but -store is '{store_name}'", file=sys.stderr) + return 1 + if store_name: + for item in items_to_process: + s = get_field(item, "store") + if s and str(s) != str(store_name): + log(f"Cross-store relationship blocked: alt item store '{s}' != '{store_name}'", file=sys.stderr) + return 1 + + # Resolve backend for store/hash operations + backend = None + is_folder_store = False + store_root: Optional[Path] = None + if store_name: + try: + store = Store(config) + backend = store[str(store_name)] + loc = getattr(backend, "location", None) + if callable(loc): + is_folder_store = True + store_root = Path(str(loc())) + except Exception: + backend = None + is_folder_store = False + store_root = None + + # Select Hydrus client: + # - If a store is specified and maps to a HydrusNetwork backend, use that backend's client. + # - If no store is specified, use the default Hydrus client. + # NOTE: When a store is specified, we do not fall back to a global/default Hydrus client. hydrus_client = None - if has_hydrus_hashes: + if store_name and (not is_folder_store) and backend is not None: + try: + candidate = getattr(backend, "_client", None) + if candidate is not None and hasattr(candidate, "set_relationship"): + hydrus_client = candidate + except Exception: + hydrus_client = None + elif not store_name: try: hydrus_client = hydrus_wrapper.get_client(config) - except Exception as exc: - log(f"Hydrus unavailable, will use local storage: {exc}", file=sys.stderr) - - # Use local storage if it's available and either Hydrus is not available or items are local files - use_local_storage = local_storage_path and (not has_hydrus_hashes or (arg_path and arg_path.exists())) + except Exception: + hydrus_client = None - # Resolve the king reference once (if provided) - king_hash = None - if king_arg: - # Resolve the king reference (could be @4 or a direct hash) - king_hash = _resolve_king_reference(king_arg) - if not king_hash: - log(f"Failed to resolve king argument: {king_arg}", file=sys.stderr) - return 1 + # Sidecar/tag import fallback DB root (legacy): if a folder store is selected, use it; + # otherwise fall back to configured local storage path. + from config import get_local_storage_path + + local_storage_root: Optional[Path] = None + if store_root is not None: + local_storage_root = store_root + else: + try: + p = get_local_storage_path(config) if config else None + local_storage_root = Path(p) if p else None + except Exception: + local_storage_root = None + + use_local_storage = local_storage_root is not None + + if king_hash: log(f"Using king hash: {king_hash}", file=sys.stderr) - # Process each item in the list - for item_idx, item in enumerate(items_to_process): + # If -path is provided, try reading relationship tags from its sidecar and persisting them. + if arg_path is not None and arg_path.exists() and arg_path.is_file(): + try: + sidecar_path = find_sidecar(arg_path) + if sidecar_path is not None and sidecar_path.exists(): + _, tags, _ = read_sidecar(sidecar_path) + relationship_tags = [t for t in (tags or []) if isinstance(t, str) and t.lower().startswith("relationship:")] + if relationship_tags: + code = _apply_relationships_from_tags( + relationship_tags, + hydrus_client=hydrus_client, + use_local_storage=use_local_storage, + local_storage_path=local_storage_root, + config=config, + ) + return 0 if code == 0 else 1 + except Exception: + pass + + # If piped items include relationship tags, persist them (one pass) then exit. + try: + rel_tags_from_pipe: list[str] = [] + for item in items_to_process: + tags_val = None + if isinstance(item, dict): + tags_val = item.get("tag") or item.get("tags") + else: + tags_val = getattr(item, "tag", None) + if isinstance(tags_val, list): + rel_tags_from_pipe.extend([t for t in tags_val if isinstance(t, str) and t.lower().startswith("relationship:")]) + elif isinstance(tags_val, str) and tags_val.lower().startswith("relationship:"): + rel_tags_from_pipe.append(tags_val) + + if rel_tags_from_pipe: + code = _apply_relationships_from_tags( + rel_tags_from_pipe, + hydrus_client=hydrus_client, + use_local_storage=use_local_storage, + local_storage_path=local_storage_root, + config=config, + ) + return 0 if code == 0 else 1 + except Exception: + pass + + # STORE/HASH MODE (preferred): use -store and hashes; do not require file paths. + if store_name and is_folder_store and store_root is not None: + try: + with API_folder_store(store_root) as db: + # Mode 1: no explicit king -> first is king, rest are alts + if not king_hash: + first_hash = None + for item in items_to_process: + h, item_store = _extract_hash_and_store(item) + if item_store and store_name and str(item_store) != str(store_name): + log(f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr) + return 1 + if not h: + continue + if not first_hash: + first_hash = h + continue + # directional alt -> king by default for local DB + bidirectional = str(rel_type).lower() != "alt" + db.set_relationship_by_hash(h, first_hash, str(rel_type), bidirectional=bidirectional) + return 0 + + # Mode 2: explicit king + for item in items_to_process: + h, item_store = _extract_hash_and_store(item) + if item_store and store_name and str(item_store) != str(store_name): + log(f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr) + return 1 + if not h or h == king_hash: + continue + bidirectional = str(rel_type).lower() != "alt" + db.set_relationship_by_hash(h, king_hash, str(rel_type), bidirectional=bidirectional) + return 0 + except Exception as exc: + log(f"Failed to set store relationships: {exc}", file=sys.stderr) + return 1 + + if store_name and (not is_folder_store): + # Hydrus store/hash mode + if hydrus_client is None: + log("Hydrus client unavailable for this store", file=sys.stderr) + return 1 + + # Verify hashes exist in this Hydrus backend to prevent cross-store edges. + if king_hash and (not _hydrus_hash_exists(hydrus_client, king_hash)): + log(f"Cross-store relationship blocked: king hash not found in store '{store_name}'", file=sys.stderr) + return 1 + + # Mode 1: first is king + if not king_hash: + first_hash = None + for item in items_to_process: + h, item_store = _extract_hash_and_store(item) + if item_store and store_name and str(item_store) != str(store_name): + log(f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr) + return 1 + if not h: + continue + if not first_hash: + first_hash = h + if not _hydrus_hash_exists(hydrus_client, first_hash): + log(f"Cross-store relationship blocked: hash not found in store '{store_name}'", file=sys.stderr) + return 1 + continue + if h != first_hash: + if not _hydrus_hash_exists(hydrus_client, h): + log(f"Cross-store relationship blocked: hash not found in store '{store_name}'", file=sys.stderr) + return 1 + hydrus_client.set_relationship(h, first_hash, str(rel_type)) + return 0 + + # Mode 2: explicit king + for item in items_to_process: + h, item_store = _extract_hash_and_store(item) + if item_store and store_name and str(item_store) != str(store_name): + log(f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr) + return 1 + if not h or h == king_hash: + continue + if not _hydrus_hash_exists(hydrus_client, h): + log(f"Cross-store relationship blocked: hash not found in store '{store_name}'", file=sys.stderr) + return 1 + hydrus_client.set_relationship(h, king_hash, str(rel_type)) + return 0 + + # Process each item in the list (legacy path-based mode) + for item in items_to_process: # Extract hash and path from current item file_hash = None file_path_from_result = None @@ -277,9 +698,83 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: file_hash = getattr(item, "hash_hex", None) or getattr(item, "hash", None) file_path_from_result = getattr(item, "file_path", None) or getattr(item, "path", None) + # Legacy LOCAL STORAGE MODE: Handle relationships for local files + # (kept for -path sidecar workflows; store/hash mode above is preferred) + from API.folder import LocalLibrarySearchOptimizer + from config import get_local_storage_path + local_storage_path = get_local_storage_path(config) if config else None + use_local_storage = bool(local_storage_path) + local_storage_root: Optional[Path] = None + if local_storage_path: + try: + local_storage_root = Path(local_storage_path) + except Exception: + local_storage_root = None + + if use_local_storage and file_path_from_result: + try: + file_path_obj = Path(str(file_path_from_result)) + except Exception as exc: + log(f"Local storage error: {exc}", file=sys.stderr) + return 1 + + if not file_path_obj.exists(): + # Not a local file; fall through to Hydrus if possible. + file_path_obj = None + + if file_path_obj is not None: + try: + if local_storage_root is None: + log("Local storage path unavailable", file=sys.stderr) + return 1 + + with LocalLibrarySearchOptimizer(local_storage_root) as opt: + if opt.db is None: + log("Local storage DB unavailable", file=sys.stderr) + return 1 + + if king_hash: + normalized_king = _normalise_hash_hex(str(king_hash)) + if not normalized_king: + log(f"King hash invalid: {king_hash}", file=sys.stderr) + return 1 + king_file_path = opt.db.search_hash(normalized_king) + if not king_file_path: + log(f"King hash not found in local DB: {king_hash}", file=sys.stderr) + return 1 + + bidirectional = str(rel_type).lower() != "alt" + opt.db.set_relationship(file_path_obj, king_file_path, rel_type, bidirectional=bidirectional) + log(f"Set {rel_type} relationship: {file_path_obj.name} -> {king_file_path.name}", file=sys.stderr) + _refresh_relationship_view_if_current(None, str(file_path_obj), str(king_file_path), config) + else: + # Original behavior: first becomes king, rest become alts + try: + king_path = ctx.load_value("relationship_king_path") + except Exception: + king_path = None + + if not king_path: + try: + ctx.store_value("relationship_king_path", str(file_path_obj)) + log(f"Established king file: {file_path_obj.name}", file=sys.stderr) + continue + except Exception: + pass + + if king_path and king_path != str(file_path_obj): + bidirectional = str(rel_type).lower() != "alt" + opt.db.set_relationship(file_path_obj, Path(king_path), rel_type, bidirectional=bidirectional) + log(f"Set {rel_type} relationship: {file_path_obj.name} -> {Path(king_path).name}", file=sys.stderr) + _refresh_relationship_view_if_current(None, str(file_path_obj), str(king_path), config) + except Exception as exc: + log(f"Local storage error: {exc}", file=sys.stderr) + return 1 + continue + # PIPELINE MODE with Hydrus: Track relationships using hash if file_hash and hydrus_client: - file_hash = _normalise_hash_hex(file_hash) + file_hash = _normalise_hash_hex(str(file_hash) if file_hash is not None else None) if not file_hash: log("Invalid file hash format", file=sys.stderr) return 1 @@ -292,7 +787,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: f"[add-relationship] Set {rel_type} relationship: {file_hash} <-> {king_hash}", file=sys.stderr ) - _refresh_relationship_view_if_current(file_hash, file_path_from_result, king_hash, config) + _refresh_relationship_view_if_current(file_hash, str(file_path_from_result) if file_path_from_result is not None else None, king_hash, config) except Exception as exc: log(f"Failed to set relationship: {exc}", file=sys.stderr) return 1 @@ -320,58 +815,12 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: f"[add-relationship] Set {rel_type} relationship: {file_hash} <-> {existing_king}", file=sys.stderr ) - _refresh_relationship_view_if_current(file_hash, file_path_from_result, existing_king, config) + _refresh_relationship_view_if_current(file_hash, str(file_path_from_result) if file_path_from_result is not None else None, existing_king, config) except Exception as exc: log(f"Failed to set relationship: {exc}", file=sys.stderr) return 1 - # LOCAL STORAGE MODE: Handle relationships for local files - elif use_local_storage and file_path_from_result: - try: - file_path_obj = Path(str(file_path_from_result)) - - if not file_path_obj.exists(): - log(f"File not found: {file_path_obj}", file=sys.stderr) - return 1 - - if king_hash: - # king_hash is a file path from _resolve_king_reference (or a Hydrus hash) - king_file_path = Path(str(king_hash)) if king_hash else None - if king_file_path and king_file_path.exists(): - with LocalLibrarySearchOptimizer(local_storage_path) as db: - db.set_relationship(file_path_obj, king_file_path, rel_type) - log(f"Set {rel_type} relationship: {file_path_obj.name} -> {king_file_path.name}", file=sys.stderr) - _refresh_relationship_view_if_current(None, str(file_path_obj), str(king_file_path), config) - else: - log(f"King file not found or invalid: {king_hash}", file=sys.stderr) - return 1 - else: - # Original behavior: first becomes king, rest become alts - try: - king_path = ctx.load_value("relationship_king_path") - except Exception: - king_path = None - - if not king_path: - try: - ctx.store_value("relationship_king_path", str(file_path_obj)) - log(f"Established king file: {file_path_obj.name}", file=sys.stderr) - continue # Move to next item - except Exception: - pass - - if king_path and king_path != str(file_path_obj): - try: - with LocalLibrarySearchOptimizer(local_storage_path) as db: - db.set_relationship(file_path_obj, Path(king_path), rel_type) - log(f"Set {rel_type} relationship: {file_path_obj.name} -> {Path(king_path).name}", file=sys.stderr) - _refresh_relationship_view_if_current(None, str(file_path_obj), str(king_path), config) - except Exception as exc: - log(f"Failed to set relationship: {exc}", file=sys.stderr) - return 1 - except Exception as exc: - log(f"Local storage error: {exc}", file=sys.stderr) - return 1 + # If we get here, we didn't have a usable local path and Hydrus isn't available/usable. return 0 @@ -395,12 +844,12 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: # Build Hydrus client try: - client = hydrus_wrapper.get_client(config) + hydrus_client = hydrus_wrapper.get_client(config) except Exception as exc: log(f"Hydrus client unavailable: {exc}", file=sys.stderr) return 1 - if client is None: + if hydrus_client is None: log("Hydrus client unavailable", file=sys.stderr) return 1 diff --git a/cmdlet/add_tag.py b/cmdlet/add_tag.py index b49e584..91d477f 100644 --- a/cmdlet/add_tag.py +++ b/cmdlet/add_tag.py @@ -272,8 +272,16 @@ class Add_Tag(Cmdlet): # Parse arguments parsed = parse_cmdlet_args(args, self) - # Check for --all flag - include_temp = parsed.get("all", False) + # If add-tag is in the middle of a pipeline (has downstream stages), default to + # including temp files. This enables common flows like: + # @N | download-media | add-tag ... | add-file ... + store_override = parsed.get("store") + stage_ctx = ctx.get_stage_context() + has_downstream = bool(stage_ctx is not None and not getattr(stage_ctx, "is_last_stage", False)) + + include_temp = bool(parsed.get("all", False)) + if has_downstream and not include_temp and not store_override: + include_temp = True # Normalize input to list results = normalize_result_input(result) @@ -352,8 +360,6 @@ class Add_Tag(Cmdlet): total_added = 0 total_modified = 0 - store_override = parsed.get("store") - store_registry = Store(config) for res in results: diff --git a/cmdlet/check_file_status.py b/cmdlet/check_file_status.py index aeb36f0..ffacac4 100644 --- a/cmdlet/check_file_status.py +++ b/cmdlet/check_file_status.py @@ -8,14 +8,16 @@ from SYS.logger import log from API import HydrusNetwork as hydrus_wrapper from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, should_show_help +from Store import Store CMDLET = Cmdlet( name="check-file-status", summary="Check if a file is active, deleted, or corrupted in Hydrus.", - usage="check-file-status [-hash ]", + usage="check-file-status [-hash ] [-store ]", arg=[ SharedArgs.HASH, + SharedArgs.STORE, ], detail=[ "- Shows whether file is active in Hydrus or marked as deleted", @@ -34,6 +36,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Parse arguments override_hash: str | None = None + override_store: str | None = None i = 0 while i < len(args): token = args[i] @@ -42,24 +45,57 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: override_hash = str(args[i + 1]).strip() i += 2 continue + if low in {"-store", "--store", "store"} and i + 1 < len(args): + override_store = str(args[i + 1]).strip() + i += 2 + continue i += 1 - hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None)) + store_name: str | None = override_store + if not store_name: + if isinstance(result, dict): + store_name = str(result.get("store") or "").strip() or None + else: + store_name = str(getattr(result, "store", "") or "").strip() or None + + if override_hash: + hash_hex = normalize_hash(override_hash) + else: + if isinstance(result, dict): + hash_hex = normalize_hash(result.get("hash") or result.get("hash_hex")) + else: + hash_hex = normalize_hash(getattr(result, "hash", None) or getattr(result, "hash_hex", None)) if not hash_hex: log("No hash provided and no result selected", file=sys.stderr) return 1 try: - client = hydrus_wrapper.get_client(config) + client = None + if store_name: + # Store specified: do not fall back to a global/default Hydrus client. + try: + store = Store(config) + backend = store[str(store_name)] + candidate = getattr(backend, "_client", None) + if candidate is not None and hasattr(candidate, "fetch_file_metadata"): + client = candidate + except Exception: + client = None + + if client is None: + log(f"Hydrus client unavailable for store '{store_name}'", file=sys.stderr) + return 1 + else: + client = hydrus_wrapper.get_client(config) + + if client is None: + log("Hydrus client unavailable", file=sys.stderr) + return 1 except Exception as exc: log(f"Hydrus client unavailable: {exc}", file=sys.stderr) return 1 - if client is None: - log("Hydrus client unavailable", file=sys.stderr) - return 1 - try: result_data = client.fetch_file_metadata(hashes=[hash_hex]) if not result_data.get("metadata"): diff --git a/cmdlet/delete_file.py b/cmdlet/delete_file.py index 203c5dc..22ebfd0 100644 --- a/cmdlet/delete_file.py +++ b/cmdlet/delete_file.py @@ -7,6 +7,7 @@ from pathlib import Path from SYS.logger import debug, log from Store.Folder import Folder +from Store import Store from ._shared import Cmdlet, CmdletArg, normalize_hash, looks_like_hash, get_field, should_show_help from API import HydrusNetwork as hydrus_wrapper import pipeline as ctx @@ -119,30 +120,48 @@ class Delete_File(Cmdlet): should_try_hydrus = False if should_try_hydrus and hash_hex: - try: - client = hydrus_wrapper.get_client(config) - except Exception as exc: - if not local_deleted: - log(f"Hydrus client unavailable: {exc}", file=sys.stderr) - return False + client = None + if store: + # Store specified: do not fall back to a global/default Hydrus client. + try: + registry = Store(config) + backend = registry[str(store)] + candidate = getattr(backend, "_client", None) + if candidate is not None and hasattr(candidate, "_post"): + client = candidate + except Exception as exc: + if not local_deleted: + log(f"Hydrus client unavailable for store '{store}': {exc}", file=sys.stderr) + return False + if client is None: + if not local_deleted: + log(f"Hydrus client unavailable for store '{store}'", file=sys.stderr) + return False else: + # No store context; use default Hydrus client. + try: + client = hydrus_wrapper.get_client(config) + except Exception as exc: + if not local_deleted: + log(f"Hydrus client unavailable: {exc}", file=sys.stderr) + return False if client is None: if not local_deleted: log("Hydrus client unavailable", file=sys.stderr) return False - else: - payload: Dict[str, Any] = {"hashes": [hash_hex]} - if reason: - payload["reason"] = reason - try: - client._post("/add_files/delete_files", data=payload) # type: ignore[attr-defined] - hydrus_deleted = True - preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '') - debug(f"Deleted from Hydrus: {preview}…", file=sys.stderr) - except Exception as exc: - # If it's not in Hydrus (e.g. 404 or similar), that's fine - if not local_deleted: - return False + + payload: Dict[str, Any] = {"hashes": [hash_hex]} + if reason: + payload["reason"] = reason + try: + client._post("/add_files/delete_files", data=payload) # type: ignore[attr-defined] + hydrus_deleted = True + preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '') + debug(f"Deleted from Hydrus: {preview}…", file=sys.stderr) + except Exception: + # If it's not in Hydrus (e.g. 404 or similar), that's fine + if not local_deleted: + return False if hydrus_deleted and hash_hex: preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '') diff --git a/cmdlet/delete_relationship.py b/cmdlet/delete_relationship.py index 9c587b6..80e0f53 100644 --- a/cmdlet/delete_relationship.py +++ b/cmdlet/delete_relationship.py @@ -10,11 +10,65 @@ import sys from SYS.logger import log import pipeline as ctx -from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input, get_field, should_show_help -from API.folder import LocalLibrarySearchOptimizer +from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, normalize_hash, normalize_result_input, get_field, should_show_help +from API.folder import API_folder_store +from Store import Store from config import get_local_storage_path +def _extract_hash(item: Any) -> Optional[str]: + h = get_field(item, "hash_hex") or get_field(item, "hash") or get_field(item, "file_hash") + return normalize_hash(str(h)) if h else None + + +def _upsert_relationships(db: API_folder_store, file_hash: str, relationships: Dict[str, Any]) -> None: + conn = db.connection + if conn is None: + raise RuntimeError("Store DB connection is not initialized") + cursor = conn.cursor() + cursor.execute( + """ + INSERT INTO metadata (hash, relationships) + VALUES (?, ?) + ON CONFLICT(hash) DO UPDATE SET + relationships = excluded.relationships, + time_modified = CURRENT_TIMESTAMP, + updated_at = CURRENT_TIMESTAMP + """, + (file_hash, json.dumps(relationships) if relationships else "{}"), + ) + + +def _remove_reverse_link(db: API_folder_store, *, src_hash: str, dst_hash: str, rel_type: str) -> None: + meta = db.get_metadata(dst_hash) or {} + rels = meta.get("relationships") if isinstance(meta, dict) else None + if not isinstance(rels, dict) or not rels: + return + + key_to_edit: Optional[str] = None + for k in list(rels.keys()): + if str(k).lower() == str(rel_type).lower(): + key_to_edit = str(k) + break + if not key_to_edit: + return + + bucket = rels.get(key_to_edit) + if not isinstance(bucket, list) or not bucket: + return + + new_bucket = [h for h in bucket if str(h).lower() != str(src_hash).lower()] + if new_bucket: + rels[key_to_edit] = new_bucket + else: + try: + del rels[key_to_edit] + except Exception: + rels[key_to_edit] = [] + + _upsert_relationships(db, dst_hash, rels) + + def _refresh_relationship_view_if_current(target_hash: Optional[str], target_path: Optional[str], other: Optional[str], config: Dict[str, Any]) -> None: """If the current subject matches the target, refresh relationships via get-relationship.""" try: @@ -84,109 +138,223 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: parsed_args = parse_cmdlet_args(args, CMDLET) delete_all_flag = parsed_args.get("all", False) rel_type_filter = parsed_args.get("type") + override_store = parsed_args.get("store") + override_hash = parsed_args.get("hash") + raw_path = parsed_args.get("path") + # Normalize input + results = normalize_result_input(result) + + # Allow store/hash-first usage when no pipeline items were provided + if (not results) and override_hash: + raw = str(override_hash) + parts = [p.strip() for p in raw.replace(";", ",").split(",") if p.strip()] + hashes = [h for h in (normalize_hash(p) for p in parts) if h] + if not hashes: + log("Invalid -hash value (expected 64-hex sha256)", file=sys.stderr) + return 1 + if not override_store: + log("-store is required when using -hash without piped items", file=sys.stderr) + return 1 + results = [{"hash": h, "store": str(override_store)} for h in hashes] + + if not results: + # Legacy -path mode below may still apply + if raw_path: + results = [{"file_path": raw_path}] + else: + log("No results to process", file=sys.stderr) + return 1 + + # Decide store (for same-store enforcement + folder-store DB routing) + store_name: Optional[str] = str(override_store).strip() if override_store else None + if not store_name: + stores = {str(get_field(r, "store")) for r in results if get_field(r, "store")} + if len(stores) == 1: + store_name = next(iter(stores)) + elif len(stores) > 1: + log("Multiple stores detected in pipeline; use -store to choose one", file=sys.stderr) + return 1 + + deleted_count = 0 + + # STORE/HASH FIRST: folder-store DB deletion (preferred) + if store_name: + backend = None + store_root: Optional[Path] = None + try: + store = Store(config) + backend = store[str(store_name)] + loc = getattr(backend, "location", None) + if callable(loc): + store_root = Path(str(loc())) + except Exception: + backend = None + store_root = None + + if store_root is not None: + try: + with API_folder_store(store_root) as db: + conn = db.connection + if conn is None: + raise RuntimeError("Store DB connection is not initialized") + for single_result in results: + # Enforce same-store when items carry store info + item_store = get_field(single_result, "store") + if item_store and str(item_store) != str(store_name): + log(f"Cross-store delete blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr) + return 1 + + file_hash = _extract_hash(single_result) + if not file_hash: + # Try path -> hash lookup within this store + fp = ( + get_field(single_result, "file_path") + or get_field(single_result, "path") + or get_field(single_result, "target") + ) + if fp: + try: + file_hash = db.get_file_hash(Path(str(fp))) + except Exception: + file_hash = None + if not file_hash: + log("Could not extract file hash for deletion (use -hash or ensure pipeline includes hash)", file=sys.stderr) + return 1 + + meta = db.get_metadata(file_hash) or {} + rels = meta.get("relationships") if isinstance(meta, dict) else None + if not isinstance(rels, dict) or not rels: + continue + + if delete_all_flag: + # remove reverse edges for all types + for rt, hashes in list(rels.items()): + if not isinstance(hashes, list): + continue + for other_hash in hashes: + other_norm = normalize_hash(str(other_hash)) + if other_norm: + _remove_reverse_link(db, src_hash=file_hash, dst_hash=other_norm, rel_type=str(rt)) + rels = {} + elif rel_type_filter: + # delete one type (case-insensitive key match) + key_to_delete: Optional[str] = None + for k in list(rels.keys()): + if str(k).lower() == str(rel_type_filter).lower(): + key_to_delete = str(k) + break + if not key_to_delete: + continue + hashes = rels.get(key_to_delete) + if isinstance(hashes, list): + for other_hash in hashes: + other_norm = normalize_hash(str(other_hash)) + if other_norm: + _remove_reverse_link(db, src_hash=file_hash, dst_hash=other_norm, rel_type=str(key_to_delete)) + try: + del rels[key_to_delete] + except Exception: + rels[key_to_delete] = [] + else: + log("Specify --all to delete all relationships or -type to delete specific type", file=sys.stderr) + return 1 + + _upsert_relationships(db, file_hash, rels) + conn.commit() + _refresh_relationship_view_if_current(file_hash, None, None, config) + deleted_count += 1 + + log(f"Successfully deleted relationships from {deleted_count} file(s)", file=sys.stderr) + return 0 + except Exception as exc: + log(f"Error deleting store relationships: {exc}", file=sys.stderr) + return 1 + + # LEGACY PATH MODE (single local DB) # Get storage path local_storage_path = get_local_storage_path(config) if not local_storage_path: log("Local storage path not configured", file=sys.stderr) return 1 - - # Normalize input - results = normalize_result_input(result) - - if not results: - log("No results to process", file=sys.stderr) - return 1 - - deleted_count = 0 - - for single_result in results: - try: - # Get file path from result - file_path_from_result = ( - get_field(single_result, "file_path") - or get_field(single_result, "path") - or get_field(single_result, "target") - or (str(single_result) if not isinstance(single_result, dict) else None) - ) - - if not file_path_from_result: - log("Could not extract file path from result", file=sys.stderr) - return 1 - - file_path_obj = Path(str(file_path_from_result)) - - if not file_path_obj.exists(): - log(f"File not found: {file_path_obj}", file=sys.stderr) - return 1 - - with LocalLibrarySearchOptimizer(local_storage_path) as db: - file_id = db.db.get_file_id(file_path_obj) - - if not file_id: + + try: + with API_folder_store(Path(local_storage_path)) as db: + conn = db.connection + if conn is None: + raise RuntimeError("Store DB connection is not initialized") + + for single_result in results: + # Get file path from result + file_path_from_result = ( + get_field(single_result, "file_path") + or get_field(single_result, "path") + or get_field(single_result, "target") + or (str(single_result) if not isinstance(single_result, dict) else None) + ) + + if not file_path_from_result: + log("Could not extract file path from result", file=sys.stderr) + return 1 + + file_path_obj = Path(str(file_path_from_result)) + + if not file_path_obj.exists(): + log(f"File not found: {file_path_obj}", file=sys.stderr) + return 1 + + try: + file_hash = db.get_file_hash(file_path_obj) + except Exception: + file_hash = None + file_hash = normalize_hash(str(file_hash)) if file_hash else None + if not file_hash: log(f"File not in database: {file_path_obj.name}", file=sys.stderr) continue - - # Get current relationships - cursor = db.db.connection.cursor() - cursor.execute(""" - SELECT relationships FROM metadata WHERE file_id = ? - """, (file_id,)) - - row = cursor.fetchone() - if not row: - log(f"No relationships found for: {file_path_obj.name}", file=sys.stderr) + + meta = db.get_metadata(file_hash) or {} + rels = meta.get("relationships") if isinstance(meta, dict) else None + if not isinstance(rels, dict) or not rels: continue - - relationships_str = row[0] - if not relationships_str: - log(f"No relationships found for: {file_path_obj.name}", file=sys.stderr) - continue - - try: - relationships = json.loads(relationships_str) - except json.JSONDecodeError: - log(f"Invalid relationship data for: {file_path_obj.name}", file=sys.stderr) - continue - - if not isinstance(relationships, dict): - relationships = {} - - # Determine what to delete + if delete_all_flag: - # Delete all relationships - deleted_types = list(relationships.keys()) - relationships = {} - log(f"Deleted all relationships ({len(deleted_types)} types) from: {file_path_obj.name}", file=sys.stderr) + for rt, hashes in list(rels.items()): + if not isinstance(hashes, list): + continue + for other_hash in hashes: + other_norm = normalize_hash(str(other_hash)) + if other_norm: + _remove_reverse_link(db, src_hash=file_hash, dst_hash=other_norm, rel_type=str(rt)) + rels = {} elif rel_type_filter: - # Delete specific type - if rel_type_filter in relationships: - deleted_count_for_type = len(relationships[rel_type_filter]) - del relationships[rel_type_filter] - log(f"Deleted {deleted_count_for_type} {rel_type_filter} relationship(s) from: {file_path_obj.name}", file=sys.stderr) - else: - log(f"No {rel_type_filter} relationships found for: {file_path_obj.name}", file=sys.stderr) + key_to_delete: Optional[str] = None + for k in list(rels.keys()): + if str(k).lower() == str(rel_type_filter).lower(): + key_to_delete = str(k) + break + if not key_to_delete: continue + hashes = rels.get(key_to_delete) + if isinstance(hashes, list): + for other_hash in hashes: + other_norm = normalize_hash(str(other_hash)) + if other_norm: + _remove_reverse_link(db, src_hash=file_hash, dst_hash=other_norm, rel_type=str(key_to_delete)) + try: + del rels[key_to_delete] + except Exception: + rels[key_to_delete] = [] else: log("Specify --all to delete all relationships or -type to delete specific type", file=sys.stderr) return 1 - - # Save updated relationships - cursor.execute(""" - INSERT INTO metadata (file_id, relationships) - VALUES (?, ?) - ON CONFLICT(file_id) DO UPDATE SET - relationships = excluded.relationships, - time_modified = CURRENT_TIMESTAMP - """, (file_id, json.dumps(relationships) if relationships else None)) - - db.db.connection.commit() - _refresh_relationship_view_if_current(None, str(file_path_obj), None, config) + + _upsert_relationships(db, file_hash, rels) + conn.commit() + _refresh_relationship_view_if_current(file_hash, str(file_path_obj), None, config) deleted_count += 1 - - except Exception as exc: - log(f"Error deleting relationship: {exc}", file=sys.stderr) - return 1 + except Exception as exc: + log(f"Error deleting relationship: {exc}", file=sys.stderr) + return 1 log(f"Successfully deleted relationships from {deleted_count} file(s)", file=sys.stderr) return 0 @@ -201,7 +369,9 @@ CMDLET = Cmdlet( summary="Remove relationships from files.", usage="@1 | delete-relationship --all OR delete-relationship -path --all OR @1-3 | delete-relationship -type alt", arg=[ - CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."), + CmdletArg("path", type="string", description="Specify the local file path (legacy mode, if not piping a result)."), + SharedArgs.STORE, + SharedArgs.HASH, CmdletArg("all", type="flag", description="Delete all relationships for the file(s)."), CmdletArg("type", type="string", description="Delete specific relationship type ('alt', 'king', 'related'). Default: delete all types."), ], diff --git a/cmdlet/download_file.py b/cmdlet/download_file.py index f0b09a3..a2412b1 100644 --- a/cmdlet/download_file.py +++ b/cmdlet/download_file.py @@ -215,6 +215,38 @@ class Download_File(Cmdlet): if availability or reason: msg += f" (availability={availability or ''} reason={reason or ''})" log(msg, file=sys.stderr) + + # Fallback: run a LibGen title search so the user can pick an alternative source. + try: + title_text = str(title or "").strip() + if not title_text and isinstance(full_metadata, dict): + title_text = str(full_metadata.get("title") or "").strip() + if title_text: + log(f"[download-file] Not available on OpenLibrary; searching LibGen for: {title_text}", file=sys.stderr) + from cmdlet.search_provider import CMDLET as _SEARCH_PROVIDER_CMDLET + # Use plain title text (LibGen mirrors can be finicky with fielded query prefixes). + fallback_query = title_text + + ret = _SEARCH_PROVIDER_CMDLET.exec( + None, + ["-provider", "libgen", "-query", fallback_query], + config, + ) + + # download-file is treated as an action command by the pipeline printer. + # Promote the search-provider table to a display overlay so it renders. + try: + table = pipeline_context.get_last_result_table() + items = pipeline_context.get_last_result_items() + if table is not None: + pipeline_context.set_last_result_table_overlay(table, items) + except Exception: + pass + + return int(ret) + except Exception: + pass + continue # Fallback: if we have a direct HTTP URL, download it directly diff --git a/cmdlet/download_media.py b/cmdlet/download_media.py index 7fa8ec9..0b7b1df 100644 --- a/cmdlet/download_media.py +++ b/cmdlet/download_media.py @@ -23,6 +23,7 @@ import re import string import subprocess import sys +import tempfile import time import traceback from typing import Any, Dict, Iterator, List, Optional @@ -274,17 +275,28 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]: base_options["format_sort"] = ["res:4320", "res:2880", "res:2160", "res:1440", "res:1080", "res:720", "res"] if opts.clip_sections: - sections = [] - for section_range in opts.clip_sections.split(','): + sections: List[str] = [] + + def _secs_to_hms(seconds: float) -> str: + total = max(0, int(seconds)) + minutes, secs = divmod(total, 60) + hours, minutes = divmod(minutes, 60) + return f"{hours:02d}:{minutes:02d}:{secs:02d}" + + for section_range in str(opts.clip_sections).split(","): + section_range = section_range.strip() + if not section_range: + continue try: - start_s, end_s = [int(x) for x in section_range.split('-')] - def _secs_to_hms(s: int) -> str: - minutes, seconds = divmod(s, 60) - hours, minutes = divmod(minutes, 60) - return f"{hours:02d}:{minutes:02d}:{seconds:02d}" + start_s_raw, end_s_raw = section_range.split("-", 1) + start_s = float(start_s_raw.strip()) + end_s = float(end_s_raw.strip()) + if start_s >= end_s: + continue sections.append(f"*{_secs_to_hms(start_s)}-{_secs_to_hms(end_s)}") except (ValueError, AttributeError): - pass + continue + if sections: base_options["download_sections"] = sections debug(f"Download sections configured: {', '.join(sections)}") @@ -1150,6 +1162,7 @@ class Download_Media(Cmdlet): CmdletArg(name="format", type="string", alias="fmt", description="Explicit yt-dlp format selector"), CmdletArg(name="clip", type="string", description="Extract time range: MM:SS-MM:SS"), CmdletArg(name="item", type="string", description="Item selection for playlists/formats"), + SharedArgs.PATH ], detail=["Download media from streaming sites using yt-dlp.", "For direct file downloads, use download-file."], exec=self.run, @@ -1215,11 +1228,11 @@ class Download_Media(Cmdlet): mode = "audio" if parsed.get("audio") else "video" - # Parse clip range if specified - clip_range = None + # Parse clip range(s) if specified + clip_ranges: Optional[List[tuple[int, int]]] = None if clip_spec: - clip_range = self._parse_time_range(clip_spec) - if not clip_range: + clip_ranges = self._parse_time_ranges(str(clip_spec)) + if not clip_ranges: log(f"Invalid clip format: {clip_spec}", file=sys.stderr) return 1 @@ -1738,7 +1751,7 @@ class Download_Media(Cmdlet): # Download each URL downloaded_count = 0 - clip_sections_spec = self._build_clip_sections_spec(clip_range) + clip_sections_spec = self._build_clip_sections_spec(clip_ranges) for url in supported_url: try: @@ -1789,15 +1802,58 @@ class Download_Media(Cmdlet): result_obj = _download_with_timeout(opts, timeout_seconds=300) debug(f"Download completed, building pipe object...") - # Emit one PipeObject per downloaded file (playlists/albums return a list) - results_to_emit = result_obj if isinstance(result_obj, list) else [result_obj] - debug(f"Emitting {len(results_to_emit)} result(s) to pipeline...") + # Expand result set: + # - playlists return a list + # - section clips return a single DownloadMediaResult with `paths` populated + results_to_emit: List[Any] = [] + if isinstance(result_obj, list): + results_to_emit = list(result_obj) + else: + paths = getattr(result_obj, "paths", None) + if isinstance(paths, list) and paths: + # Create one DownloadMediaResult per section file + for p in paths: + try: + p_path = Path(p) + except Exception: + continue + if not p_path.exists() or p_path.is_dir(): + continue + try: + hv = sha256_file(p_path) + except Exception: + hv = None + results_to_emit.append( + DownloadMediaResult( + path=p_path, + info=getattr(result_obj, "info", {}) or {}, + tag=list(getattr(result_obj, "tag", []) or []), + source_url=getattr(result_obj, "source_url", None) or opts.url, + hash_value=hv, + ) + ) + else: + results_to_emit = [result_obj] + + # Build PipeObjects first so we can attach cross-clip relationships. + pipe_objects: List[Dict[str, Any]] = [] + for downloaded in results_to_emit: + pipe_objects.append(self._build_pipe_object(downloaded, url, opts)) + + # If this is a clip download, decorate titles/tags so the title: tag is clip-based. + # Relationship tags are only added when multiple clips exist. + try: + if clip_ranges and len(pipe_objects) == len(clip_ranges): + source_hash = self._find_existing_hash_for_url(storage, canonical_url, hydrus_available=hydrus_available) + self._apply_clip_decorations(pipe_objects, clip_ranges, source_king_hash=source_hash) + except Exception: + pass + + debug(f"Emitting {len(pipe_objects)} result(s) to pipeline...") stage_ctx = pipeline_context.get_stage_context() emit_enabled = bool(stage_ctx is not None and not getattr(stage_ctx, "is_last_stage", False)) - for downloaded in results_to_emit: - pipe_obj_dict = self._build_pipe_object(downloaded, url, opts) - + for pipe_obj_dict in pipe_objects: # Only emit when there is a downstream stage. # This keeps `download-media` from producing a result table when run standalone. if emit_enabled: @@ -1808,7 +1864,7 @@ class Download_Media(Cmdlet): pipe_obj = coerce_to_pipe_object(pipe_obj_dict) register_url_with_local_library(pipe_obj, config) - downloaded_count += len(results_to_emit) + downloaded_count += len(pipe_objects) debug("✓ Downloaded and emitted") except DownloadError as e: @@ -1828,62 +1884,120 @@ class Download_Media(Cmdlet): return 1 def _resolve_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]: - """Resolve the output directory from storage location or config.""" - storage_location = parsed.get("storage") + """Resolve the output directory. - # Priority 1: --storage flag - if storage_location: + Rules: + - If user passes `-path`, use that directory (override). + - Otherwise default to a temp directory (config["temp"] if present, else OS temp). + """ + + # Priority 1: explicit output directory override + path_override = parsed.get("path") + if path_override: try: - return SharedArgs.resolve_storage(storage_location) + candidate = Path(str(path_override)).expanduser() + # If user passed a file path, treat its parent as output dir. + if candidate.suffix: + candidate = candidate.parent + candidate.mkdir(parents=True, exist_ok=True) + debug(f"Using output directory override: {candidate}") + return candidate except Exception as e: - log(f"Invalid storage location: {e}", file=sys.stderr) + log(f"Invalid -path output directory: {e}", file=sys.stderr) return None - # Priority 2: Config default output/temp directory + # Priority 2: config-provided temp/output directory try: - from config import resolve_output_dir - final_output_dir = resolve_output_dir(config) + temp_value = (config or {}).get("temp") if isinstance(config, dict) else None except Exception: - final_output_dir = Path.home() / "Videos" - - debug(f"Using default directory: {final_output_dir}") + temp_value = None + if temp_value: + try: + candidate = Path(str(temp_value)).expanduser() + candidate.mkdir(parents=True, exist_ok=True) + debug(f"Using config temp directory: {candidate}") + return candidate + except Exception as e: + log(f"Cannot use configured temp directory '{temp_value}': {e}", file=sys.stderr) + return None + # Priority 3: OS temp fallback try: - final_output_dir.mkdir(parents=True, exist_ok=True) + candidate = Path(tempfile.gettempdir()) / "Medios-Macina" + candidate.mkdir(parents=True, exist_ok=True) + debug(f"Using OS temp directory: {candidate}") + return candidate except Exception as e: - log(f"Cannot create output directory {final_output_dir}: {e}", file=sys.stderr) + log(f"Cannot create OS temp directory: {e}", file=sys.stderr) return None - return final_output_dir + def _parse_time_ranges(self, spec: str) -> List[tuple[int, int]]: + """Parse clip specs into a list of (start_seconds, end_seconds). - def _parse_time_range(self, spec: str) -> Optional[tuple]: - """Parse 'MM:SS-MM:SS' format into (start_seconds, end_seconds).""" - try: - parts = spec.split("-") - if len(parts) != 2: + Supported inputs: + - "MM:SS-MM:SS" + - "HH:MM:SS-HH:MM:SS" + - seconds: "280-300" + - multiple ranges separated by commas: "4:40-5:00,5:15-5:25" + """ + + def _to_seconds(ts: str) -> Optional[int]: + ts = str(ts).strip() + if not ts: return None - - def to_seconds(ts: str) -> int: - ts = ts.strip() - if ":" in ts: - mm, ss = ts.split(":") - return int(mm) * 60 + int(ss) - return int(ts) - - start = to_seconds(parts[0]) - end = to_seconds(parts[1]) - return (start, end) if start < end else None - except Exception: - return None + + if ":" in ts: + parts = [p.strip() for p in ts.split(":")] + if len(parts) == 2: + hh_s = "0" + mm_s, ss_s = parts + elif len(parts) == 3: + hh_s, mm_s, ss_s = parts + else: + return None + + try: + hours = int(hh_s) + minutes = int(mm_s) + seconds = float(ss_s) + total = (hours * 3600) + (minutes * 60) + seconds + return int(total) + except Exception: + return None + + try: + return int(float(ts)) + except Exception: + return None + + ranges: List[tuple[int, int]] = [] + if not spec: + return ranges + + for piece in str(spec).split(","): + piece = piece.strip() + if not piece: + continue + if "-" not in piece: + return [] + start_s, end_s = [p.strip() for p in piece.split("-", 1)] + start = _to_seconds(start_s) + end = _to_seconds(end_s) + if start is None or end is None or start >= end: + return [] + ranges.append((start, end)) + + return ranges def _build_clip_sections_spec( self, - clip_range: Optional[tuple], + clip_ranges: Optional[List[tuple[int, int]]], ) -> Optional[str]: """Convert parsed clip range into downloader spec (seconds).""" ranges: List[str] = [] - if clip_range: - ranges.append(f"{clip_range[0]}-{clip_range[1]}") + if clip_ranges: + for start_s, end_s in clip_ranges: + ranges.append(f"{start_s}-{end_s}") return ",".join(ranges) if ranges else None def _build_pipe_object(self, download_result: Any, url: str, opts: DownloadOptions) -> Dict[str, Any]: @@ -1926,6 +2040,159 @@ class Download_Media(Cmdlet): "media_kind": "video" if opts.mode == "video" else "audio", } + @staticmethod + def _normalise_hash_hex(value: Optional[str]) -> Optional[str]: + if not value or not isinstance(value, str): + return None + candidate = value.strip().lower() + if len(candidate) == 64 and all(c in "0123456789abcdef" for c in candidate): + return candidate + return None + + @classmethod + def _extract_hash_from_search_hit(cls, hit: Any) -> Optional[str]: + if not isinstance(hit, dict): + return None + for key in ("hash", "hash_hex", "file_hash", "hydrus_hash"): + v = hit.get(key) + normalized = cls._normalise_hash_hex(str(v) if v is not None else None) + if normalized: + return normalized + return None + + @classmethod + def _find_existing_hash_for_url( + cls, + storage: Any, + canonical_url: str, + *, + hydrus_available: bool, + ) -> Optional[str]: + """Best-effort lookup of an existing stored item hash by url:. + + Used to make the stored source video the king for multi-clip relationships. + """ + if storage is None or not canonical_url: + return None + try: + from Store.HydrusNetwork import HydrusNetwork + except Exception: + HydrusNetwork = None # type: ignore + + try: + backend_names = list(storage.list_searchable_backends() or []) + except Exception: + backend_names = [] + + for backend_name in backend_names: + try: + backend = storage[backend_name] + except Exception: + continue + try: + if str(backend_name).strip().lower() == "temp": + continue + except Exception: + pass + try: + if HydrusNetwork is not None and isinstance(backend, HydrusNetwork) and not hydrus_available: + continue + except Exception: + pass + + try: + hits = backend.search(f"url:{canonical_url}", limit=5) or [] + except Exception: + hits = [] + for hit in hits: + extracted = cls._extract_hash_from_search_hit(hit) + if extracted: + return extracted + + return None + + @staticmethod + def _format_timecode(seconds: int, *, force_hours: bool) -> str: + total = max(0, int(seconds)) + minutes, secs = divmod(total, 60) + hours, minutes = divmod(minutes, 60) + if force_hours: + return f"{hours:02d}:{minutes:02d}:{secs:02d}" + return f"{minutes:02d}:{secs:02d}" + + @classmethod + def _format_clip_range(cls, start_s: int, end_s: int) -> str: + force_hours = bool(start_s >= 3600 or end_s >= 3600) + return f"{cls._format_timecode(start_s, force_hours=force_hours)}-{cls._format_timecode(end_s, force_hours=force_hours)}" + + @classmethod + def _apply_clip_decorations( + cls, + pipe_objects: List[Dict[str, Any]], + clip_ranges: List[tuple[int, int]], + *, + source_king_hash: Optional[str], + ) -> None: + """Apply clip:{range} tags/titles and relationship metadata for multi-clip downloads. + + - Sets the clip title (and title: tag) to exactly `clip:{range}`. + - Adds `clip:{range}` tag. + - Sets `relationships` on each emitted item (king hash first, then alt hashes) + so downstream can persist relationships into a DB/API without storing relationship tags. + """ + if not pipe_objects or len(pipe_objects) != len(clip_ranges): + return + + # Always apply clip titles/tags (even for a single clip). + for po, (start_s, end_s) in zip(pipe_objects, clip_ranges): + clip_range = cls._format_clip_range(start_s, end_s) + clip_tag = f"clip:{clip_range}" + + # Title: make it generic/consistent for clips. + po["title"] = clip_tag + + tags = po.get("tag") + if not isinstance(tags, list): + tags = [] + + # Replace any existing title: tags with title: + tags = [t for t in tags if not str(t).strip().lower().startswith("title:")] + + # Relationships must not be stored as tags. + tags = [t for t in tags if not str(t).strip().lower().startswith("relationship:")] + tags.insert(0, f"title:{clip_tag}") + + # Ensure clip tag exists + if clip_tag not in tags: + tags.append(clip_tag) + + po["tag"] = tags + + # Relationship tagging only makes sense when multiple clips exist. + if len(pipe_objects) < 2: + return + + hashes: List[str] = [] + for po in pipe_objects: + h = cls._normalise_hash_hex(str(po.get("hash") or "")) + hashes.append(h or "") + + # Determine king: prefer an existing source video hash if present; else first clip becomes king. + king_hash = cls._normalise_hash_hex(source_king_hash) if source_king_hash else None + if not king_hash: + king_hash = hashes[0] if hashes and hashes[0] else None + if not king_hash: + return + + alt_hashes: List[str] = [h for h in hashes if h and h != king_hash] + if not alt_hashes: + return + + # Carry relationship metadata through the pipeline without using tags. + rel_payload = {"king": [king_hash], "alt": list(alt_hashes)} + for po in pipe_objects: + po["relationships"] = {"king": [king_hash], "alt": list(alt_hashes)} + def _compute_file_hash(self, filepath: Path) -> str: """Compute SHA256 hash of a file.""" import hashlib diff --git a/cmdlet/download_torrent.py b/cmdlet/download_torrent.py index 85baa1a..7e5e41c 100644 --- a/cmdlet/download_torrent.py +++ b/cmdlet/download_torrent.py @@ -43,18 +43,105 @@ class Download_Torrent(Cmdlet): output_dir = Path(parsed.get("output") or Path.home() / "Downloads") wait_timeout = int(float(parsed.get("wait", 600))) background_mode = parsed.get("background", False) - api_key = config.get("alldebrid_api_key") + api_key = None + try: + from Provider.alldebrid import _get_debrid_api_key # type: ignore + + api_key = _get_debrid_api_key(config) + except Exception: + api_key = None if not api_key: - log("AllDebrid API key not configured", file=sys.stderr) + log("AllDebrid API key not configured (check config.conf [provider=alldebrid] api_key=...)", file=sys.stderr) return 1 for magnet_url in magnet_args: if background_mode: self._start_background_worker(magnet_url, output_dir, config, api_key, wait_timeout) log(f"⧗ Torrent download queued in background: {magnet_url}") else: - self._download_torrent_worker(str(uuid.uuid4()), magnet_url, output_dir, config, api_key, wait_timeout) + # Foreground mode: submit quickly, then continue processing in background + # so we return control to the REPL immediately. + worker_id = str(uuid.uuid4()) + magnet_id = self._submit_magnet(worker_id, magnet_url, api_key) + if magnet_id <= 0: + continue + self._start_background_magnet_worker(worker_id, magnet_id, output_dir, api_key, wait_timeout) + log(f"⧗ Torrent processing started (ID: {magnet_id})") return 0 + @staticmethod + def _submit_magnet(worker_id: str, magnet_url: str, api_key: str) -> int: + """Submit a magnet and return its AllDebrid magnet ID. + + This is intentionally fast so the caller can return to the REPL. + """ + try: + from API.alldebrid import AllDebridClient + + client = AllDebridClient(api_key) + log(f"[Worker {worker_id}] Submitting magnet to AllDebrid...") + magnet_info = client.magnet_add(magnet_url) + magnet_id = int(magnet_info.get('id', 0)) + if magnet_id <= 0: + log(f"[Worker {worker_id}] Magnet add failed", file=sys.stderr) + return 0 + log(f"[Worker {worker_id}] ✓ Magnet added (ID: {magnet_id})") + return magnet_id + except Exception as e: + log(f"[Worker {worker_id}] Magnet submit failed: {e}", file=sys.stderr) + return 0 + + def _start_background_magnet_worker(self, worker_id: str, magnet_id: int, output_dir: Path, api_key: str, wait_timeout: int) -> None: + thread = threading.Thread( + target=self._download_magnet_worker, + args=(worker_id, magnet_id, output_dir, api_key, wait_timeout), + daemon=True, + name=f"TorrentWorker_{worker_id}", + ) + thread.start() + + @staticmethod + def _download_magnet_worker( + worker_id: str, + magnet_id: int, + output_dir: Path, + api_key: str, + wait_timeout: int = 600, + ) -> None: + """Poll AllDebrid magnet status until ready, then download the files.""" + try: + from API.alldebrid import AllDebridClient + + client = AllDebridClient(api_key) + + # Poll for ready status (simplified) + import time + + elapsed = 0 + while elapsed < wait_timeout: + status = client.magnet_status(magnet_id) + if status.get('ready'): + break + time.sleep(5) + elapsed += 5 + if elapsed >= wait_timeout: + log(f"[Worker {worker_id}] Timeout waiting for magnet", file=sys.stderr) + return + + files_result = client.magnet_links([magnet_id]) + magnet_files = files_result.get(str(magnet_id), {}) + files_array = magnet_files.get('files', []) + if not files_array: + log(f"[Worker {worker_id}] No files found", file=sys.stderr) + return + for file_info in files_array: + file_url = file_info.get('link') + file_name = file_info.get('name') + if file_url and file_name: + Download_Torrent._download_file(file_url, output_dir / file_name) + log(f"[Worker {worker_id}] ✓ Downloaded {file_name}") + except Exception as e: + log(f"[Worker {worker_id}] Torrent download failed: {e}", file=sys.stderr) + @staticmethod def _download_torrent_worker( worker_id: str, @@ -119,7 +206,7 @@ class Download_Torrent(Cmdlet): thread = threading.Thread( target=self._download_torrent_worker, args=(worker_id, magnet_url, output_dir, config, api_key, wait_timeout), - daemon=False, + daemon=True, name=f"TorrentWorker_{worker_id}", ) thread.start() diff --git a/cmdlet/get_file.py b/cmdlet/get_file.py index a1de5e0..3624742 100644 --- a/cmdlet/get_file.py +++ b/cmdlet/get_file.py @@ -2,10 +2,12 @@ from __future__ import annotations from typing import Any, Dict, Sequence from pathlib import Path +import os import sys import shutil +import subprocess +import webbrowser -from . import register import pipeline as ctx from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash from SYS.logger import log, debug @@ -25,8 +27,8 @@ class Get_File(Cmdlet): arg=[ SharedArgs.HASH, SharedArgs.STORE, - CmdletArg("-path", description="Output directory path (default: from config)"), - CmdletArg("-name", description="Output filename (default: from metadata title)"), + SharedArgs.PATH, + CmdletArg("name", description="Output filename (default: from metadata title)"), ], detail=[ "- Exports file from storage backend to local path", @@ -79,32 +81,23 @@ class Get_File(Cmdlet): log(f"Error: File metadata not found for hash {file_hash[:12]}...") return 1 debug(f"[get-file] Metadata retrieved: title={metadata.get('title')}, ext={metadata.get('ext')}") - - # Determine output filename - if output_name: - filename = output_name - else: - # Use title from metadata, sanitize it - title = metadata.get("title", "export") - filename = self._sanitize_filename(title) - - # Add extension if metadata has it - ext = metadata.get("ext") - if ext and not filename.endswith(ext): - if not ext.startswith('.'): - ext = '.' + ext - filename += ext - - # Determine output directory - if output_path: - output_dir = Path(output_path).expanduser() - else: - output_dir = resolve_output_dir(config) - - debug(f"[get-file] Output dir: {output_dir}") - - # Create output directory - output_dir.mkdir(parents=True, exist_ok=True) + + def resolve_display_title() -> str: + candidates = [ + get_field(result, "title"), + get_field(result, "name"), + get_field(result, "filename"), + (metadata.get("title") if isinstance(metadata, dict) else None), + (metadata.get("name") if isinstance(metadata, dict) else None), + (metadata.get("filename") if isinstance(metadata, dict) else None), + ] + for candidate in candidates: + if candidate is None: + continue + text = str(candidate).strip() + if text: + return text + return "" debug(f"[get-file] Calling backend.get_file({file_hash[:12]}...)") @@ -115,15 +108,20 @@ class Get_File(Cmdlet): # Check if backend returned a URL (HydrusNetwork case) if isinstance(source_path, str) and (source_path.startswith("http://") or source_path.startswith("https://")): - log(f"File opened in browser: {source_path}", file=sys.stderr) - ctx.emit(f"Opened in browser: {source_path}") + # Hydrus backend returns a URL; open it only for this explicit user action. + try: + webbrowser.open(source_path) + except Exception as exc: + log(f"Error opening browser: {exc}", file=sys.stderr) + else: + log(f"Opened in browser: {source_path}", file=sys.stderr) # Emit result for pipeline ctx.emit({ "hash": file_hash, "store": store_name, "url": source_path, - "title": filename, + "title": resolve_display_title() or "Opened", }) return 0 @@ -131,32 +129,58 @@ class Get_File(Cmdlet): if isinstance(source_path, str): source_path = Path(source_path) - # Determine output directory + if not source_path or not source_path.exists(): + log(f"Error: Backend could not retrieve file for hash {file_hash[:12]}...") + return 1 + + # Folder store UX: without -path, just open the file in the default app. + # Only export/copy when -path is explicitly provided. + backend_name = type(backend).__name__ + is_folder_backend = backend_name.lower() == "folder" + if is_folder_backend and not output_path: + display_title = resolve_display_title() or source_path.stem or "Opened" + ext_for_emit = metadata.get("ext") or source_path.suffix.lstrip(".") + self._open_file_default(source_path) + log(f"Opened: {source_path}", file=sys.stderr) + ctx.emit({ + "hash": file_hash, + "store": store_name, + "path": str(source_path), + "title": str(display_title), + "ext": str(ext_for_emit or ""), + }) + debug("[get-file] Completed successfully") + return 0 + + # Otherwise: export/copy to output_dir. if output_path: output_dir = Path(output_path).expanduser() else: output_dir = resolve_output_dir(config) - + debug(f"[get-file] Output dir: {output_dir}") - - # Create output directory output_dir.mkdir(parents=True, exist_ok=True) - - # Build full output path - dest_path = output_dir / filename - - # Make path unique if file exists - dest_path = self._unique_path(dest_path) - - if not source_path or not source_path.exists(): - log(f"Error: Backend could not retrieve file for hash {file_hash[:12]}...") - return 1 + + # Determine output filename (only when exporting) + if output_name: + filename = output_name + else: + title = (metadata.get("title") if isinstance(metadata, dict) else None) or resolve_display_title() or "export" + filename = self._sanitize_filename(title) + + # Add extension if metadata has it + ext = metadata.get("ext") + if ext and not filename.endswith(ext): + if not ext.startswith('.'): + ext = '.' + ext + filename += ext + + dest_path = self._unique_path(output_dir / filename) # Copy file to destination debug(f"[get-file] Copying {source_path} -> {dest_path}", file=sys.stderr) shutil.copy2(source_path, dest_path) - - ctx.emit(f"Exported to: {dest_path}") + log(f"Exported: {dest_path}", file=sys.stderr) # Emit result for pipeline @@ -169,6 +193,19 @@ class Get_File(Cmdlet): debug(f"[get-file] Completed successfully") return 0 + + def _open_file_default(self, path: Path) -> None: + """Open a local file in the OS default application.""" + try: + if sys.platform.startswith("win"): + os.startfile(str(path)) # type: ignore[attr-defined] + return + if sys.platform == "darwin": + subprocess.Popen(["open", str(path)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + return + subprocess.Popen(["xdg-open", str(path)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + except Exception as exc: + log(f"Error opening file: {exc}", file=sys.stderr) def _sanitize_filename(self, name: str) -> str: """Sanitize filename by removing invalid characters.""" diff --git a/cmdlet/get_relationship.py b/cmdlet/get_relationship.py index 9e2f5f8..1855eb5 100644 --- a/cmdlet/get_relationship.py +++ b/cmdlet/get_relationship.py @@ -10,10 +10,11 @@ from SYS.logger import log import models import pipeline as ctx from API import HydrusNetwork as hydrus_wrapper -from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, fmt_bytes, get_hash_for_operation, fetch_hydrus_metadata, should_show_help +from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, fmt_bytes, get_hash_for_operation, fetch_hydrus_metadata, should_show_help, get_field from API.folder import API_folder_store from config import get_local_storage_path from result_table import ResultTable +from Store import Store CMDLET = Cmdlet( name="get-relationship", @@ -24,6 +25,7 @@ CMDLET = Cmdlet( ], arg=[ SharedArgs.HASH, + SharedArgs.STORE, ], detail=[ "- Lists relationship data as returned by Hydrus or Local DB.", @@ -36,8 +38,9 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}") return 0 - # Parse -hash override + # Parse -hash and -store override override_hash: str | None = None + override_store: str | None = None args_list = list(_args) i = 0 while i < len(args_list): @@ -46,11 +49,20 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: if low in {"-hash", "--hash", "hash"} and i + 1 < len(args_list): override_hash = str(args_list[i + 1]).strip() break + if low in {"-store", "--store", "store"} and i + 1 < len(args_list): + override_store = str(args_list[i + 1]).strip() i += 1 - # Handle @N selection which creates a list - extract the first item - if isinstance(result, list) and len(result) > 0: - result = result[0] + # Handle @N selection which creates a list + # This cmdlet is single-subject; require disambiguation when multiple items are provided. + if isinstance(result, list): + if len(result) == 0: + result = None + elif len(result) > 1 and not override_hash: + log("get-relationship expects a single item; select one row (e.g. @1) or pass -hash", file=sys.stderr) + return 1 + else: + result = result[0] # Initialize results collection found_relationships = [] # List of dicts: {hash, type, title, path, store} @@ -65,256 +77,170 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: return found_relationships.append(entry) - # Check for local file first - file_path = None - if isinstance(result, dict): - file_path = result.get("file_path") or result.get("path") - source_title = result.get("title") or result.get("name") or "Unknown" - elif hasattr(result, "file_path"): - file_path = result.file_path - source_title = getattr(result, "title", "Unknown") - - local_db_checked = False - - if file_path and not override_hash: - try: - path_obj = Path(file_path) - if not source_title or source_title == "Unknown": - source_title = path_obj.name - - print(f"\n[DEBUG] Starting get-relationship for: {path_obj.name}", file=sys.stderr) - print(f"[DEBUG] Path exists: {path_obj.exists()}", file=sys.stderr) - - if path_obj.exists(): - storage_path = get_local_storage_path(config) - print(f"[DEBUG] Storage path: {storage_path}", file=sys.stderr) - if storage_path: - with API_folder_store(storage_path) as db: - file_hash = db.get_file_hash(path_obj) - metadata = db.get_metadata(file_hash) if file_hash else None - print(f"[DEBUG] Metadata found: {metadata is not None}", file=sys.stderr) - if metadata and metadata.get("relationships"): - local_db_checked = True - rels = metadata["relationships"] - print(f"[DEBUG] Relationships dict: {rels}", file=sys.stderr) - if isinstance(rels, dict): - for rel_type, hashes in rels.items(): - print(f"[DEBUG] Processing rel_type: {rel_type}, hashes: {hashes}", file=sys.stderr) - if hashes: - for h in hashes: - # h is now a file hash (not a path) - print(f"[DEBUG] Processing relationship hash: h={h}", file=sys.stderr) - # Resolve hash to file path - resolved_path = db.search_hash(h) - title = h[:16] + "..." - path = None - if resolved_path and resolved_path.exists(): - path = str(resolved_path) - # Try to get title from tags - try: - tags = db.get_tags(h) - found_title = False - for t in tags: - if t.lower().startswith('title:'): - title = t[6:].strip() - found_title = True - break - if not found_title: - title = resolved_path.stem - except Exception: - title = resolved_path.stem - - entry_type = "king" if rel_type.lower() == "alt" else rel_type - _add_relationship({ - "hash": h, - "type": entry_type, - "title": title, - "path": path, - "store": "local" - }) - - # RECURSIVE LOOKUP: If this is an "alt" relationship (meaning we're an alt pointing to a king), - # then we should look up the king's other alts to show siblings. - # NOTE: We only do this for "alt" relationships, not "king", to avoid duplicating - # the king's direct relationships with its alts. - print(f"[DEBUG] Checking if recursive lookup needed: rel_type={rel_type}, path={path}", file=sys.stderr) - if rel_type.lower() == "alt" and path: - print(f"[DEBUG] 🔍 RECURSIVE LOOKUP TRIGGERED for parent: {path}", file=sys.stderr) - try: - parent_path_obj = Path(path) - print(f"[DEBUG] Parent path obj: {parent_path_obj}", file=sys.stderr) - - # Also add the king/parent itself if not already in results - existing_parent = None - for r in found_relationships: - if str(r.get('hash', '')).lower() == str(path).lower() or str(r.get('path', '')).lower() == str(path).lower(): - existing_parent = r - break - if not existing_parent: - parent_title = parent_path_obj.stem - try: - parent_hash = db.get_file_hash(parent_path_obj) - if parent_hash: - parent_tags = db.get_tags(parent_hash) - for t in parent_tags: - if t.lower().startswith('title:'): - parent_title = t[6:].strip() - break - except Exception: - pass - - print(f"[DEBUG] ➕ Adding king/parent to results: {parent_title}", file=sys.stderr) - _add_relationship({ - "hash": str(path), - "type": "king" if rel_type.lower() == "alt" else rel_type, - "title": parent_title, - "path": str(path), - "store": "local" - }) - else: - # If already in results, ensure it's marked as king if appropriate - if rel_type.lower() == "alt": - existing_parent['type'] = "king" - - # 1. Check forward relationships from parent (siblings) - parent_hash = db.get_file_hash(parent_path_obj) - parent_metadata = db.get_metadata(parent_hash) if parent_hash else None - print(f"[DEBUG] 📖 Parent metadata: {parent_metadata is not None}", file=sys.stderr) - if parent_metadata: - print(f"[DEBUG] Parent metadata keys: {parent_metadata.keys()}", file=sys.stderr) - if parent_metadata and parent_metadata.get("relationships"): - parent_rels = parent_metadata["relationships"] - print(f"[DEBUG] 👑 Parent has relationships: {list(parent_rels.keys())}", file=sys.stderr) - if isinstance(parent_rels, dict): - for child_type, child_hashes in parent_rels.items(): - print(f"[DEBUG] Type '{child_type}': {len(child_hashes) if child_hashes else 0} children", file=sys.stderr) - if child_hashes: - for child_h in child_hashes: - # child_h is now a HASH, not a path - resolve it - child_path_obj = db.search_hash(child_h) - print(f"[DEBUG] Resolved hash {child_h[:16]}... to: {child_path_obj}", file=sys.stderr) - - if not child_path_obj: - # Hash doesn't resolve - skip it - print(f"[DEBUG] ⏭️ Hash doesn't resolve, skipping: {child_h}", file=sys.stderr) - continue - - # Check if already added (case-insensitive hash/path check) - if any(str(r.get('hash', '')).lower() == str(child_h).lower() or str(r.get('path', '')).lower() == str(child_path_obj).lower() for r in found_relationships): - print(f"[DEBUG] ⏭️ Already in results: {child_h}", file=sys.stderr) - continue - - # Now child_path_obj is a Path, so we can get tags - child_title = child_path_obj.stem - try: - child_hash = db.get_file_hash(child_path_obj) - if child_hash: - child_tags = db.get_tags(child_hash) - for t in child_tags: - if t.lower().startswith('title:'): - child_title = t[6:].strip() - break - except Exception: - pass - - print(f"[DEBUG] ➕ Adding sibling: {child_title}", file=sys.stderr) - _add_relationship({ - "hash": child_h, - "type": f"alt" if child_type == "alt" else f"sibling ({child_type})", - "title": child_title, - "path": str(child_path_obj), - "store": "local" - }) - else: - print(f"[DEBUG] ⚠️ Parent has no relationships metadata", file=sys.stderr) - - # 2. Check reverse relationships pointing TO parent (siblings via reverse lookup) - # This handles the case where siblings point to parent but parent doesn't point to siblings - reverse_children = db.find_files_pointing_to(parent_path_obj) - print(f"[DEBUG] 🔄 Reverse lookup found {len(reverse_children)} children", file=sys.stderr) - for child in reverse_children: - child_path = child['path'] - child_type = child['type'] - print(f"[DEBUG] Reverse child: {child_path}, type: {child_type}", file=sys.stderr) - - # Skip if already added (check by path/hash, case-insensitive) - if any(str(r.get('path', '')).lower() == str(child_path).lower() or str(r.get('hash', '')).lower() == str(child_path).lower() for r in found_relationships): - print(f"[DEBUG] ⏭️ Already in results: {child_path}", file=sys.stderr) - continue - - child_path_obj = Path(child_path) - child_title = child_path_obj.stem - try: - child_hash = db.get_file_hash(child_path_obj) - if child_hash: - child_tags = db.get_tags(child_hash) - for t in child_tags: - if t.lower().startswith('title:'): - child_title = t[6:].strip() - break - except Exception: - pass - - print(f"[DEBUG] ➕ Adding reverse sibling: {child_title}", file=sys.stderr) - _add_relationship({ - "hash": child_path, - "type": f"alt" if child_type == "alt" else f"sibling ({child_type})", - "title": child_title, - "path": child_path, - "store": "local" - }) - - except Exception as e: - print(f"[DEBUG] ❌ Recursive lookup error: {e}", file=sys.stderr) - import traceback - traceback.print_exc(file=sys.stderr) - - except Exception as e: - log(f"Recursive lookup error: {e}", file=sys.stderr) + # Store/hash-first subject resolution + store_name: Optional[str] = override_store + if not store_name: + store_name = get_field(result, "store") + + hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_hash_for_operation(None, result)) + + if not source_title or source_title == "Unknown": + source_title = get_field(result, "title") or get_field(result, "name") or (hash_hex[:16] + "..." if hash_hex else "Unknown") + + local_db_checked = False + + if store_name and hash_hex: + try: + store = Store(config) + backend = store[str(store_name)] + + # Folder store relationships + # IMPORTANT: only treat the Folder backend as a local DB store. + # Other backends may expose a location() method but are not SQLite folder stores. + if type(backend).__name__ == "Folder" and hasattr(backend, "location") and callable(getattr(backend, "location")): + storage_path = Path(str(backend.location())) + with API_folder_store(storage_path) as db: + local_db_checked = True + + # Update source title from tags if possible + try: + tags = db.get_tags(hash_hex) + for t in tags: + if isinstance(t, str) and t.lower().startswith("title:"): + source_title = t[6:].strip() + break + except Exception: + pass + + metadata = db.get_metadata(hash_hex) + rels = (metadata or {}).get("relationships") + king_hashes: list[str] = [] + + # Forward relationships + if isinstance(rels, dict): + for rel_type, hashes in rels.items(): + if not isinstance(hashes, list): + continue + for related_hash in hashes: + related_hash = normalize_hash(str(related_hash)) + if not related_hash or related_hash == hash_hex: + continue + + entry_type = "king" if str(rel_type).lower() == "alt" else str(rel_type) + if entry_type == "king": + king_hashes.append(related_hash) + + related_title = related_hash[:16] + "..." + try: + rel_tags = db.get_tags(related_hash) + for t in rel_tags: + if isinstance(t, str) and t.lower().startswith("title:"): + related_title = t[6:].strip() + break + except Exception: + pass + + _add_relationship({ + "hash": related_hash, + "type": entry_type, + "title": related_title, + "path": None, + "store": str(store_name), + }) + + # Reverse relationships (alts pointing to this hash) + try: + reverse_children = db.find_files_pointing_to_hash(hash_hex) + except Exception: + reverse_children = [] + + for child in reverse_children or []: + child_hash = normalize_hash(str(child.get("hash") or "")) + rel_type = str(child.get("type") or "").strip().lower() + if not child_hash or child_hash == hash_hex: + continue + + child_title = child_hash[:16] + "..." + try: + child_tags = db.get_tags(child_hash) + for t in child_tags: + if isinstance(t, str) and t.lower().startswith("title:"): + child_title = t[6:].strip() + break + except Exception: + pass + + entry_type = "alt" if rel_type == "alt" else (rel_type or "related") + _add_relationship({ + "hash": child_hash, + "type": entry_type, + "title": child_title, + "path": None, + "store": str(store_name), + }) + + # Siblings (alts that share the same king) + for king_hash in king_hashes: + try: + siblings = db.find_files_pointing_to_hash(king_hash) + except Exception: + siblings = [] + for sib in siblings or []: + sib_hash = normalize_hash(str(sib.get("hash") or "")) + sib_type = str(sib.get("type") or "").strip().lower() + if not sib_hash or sib_hash in {hash_hex, king_hash}: + continue + + sib_title = sib_hash[:16] + "..." + try: + sib_tags = db.get_tags(sib_hash) + for t in sib_tags: + if isinstance(t, str) and t.lower().startswith("title:"): + sib_title = t[6:].strip() + break + except Exception: + pass + + entry_type = "alt" if sib_type == "alt" else (sib_type or "related") + _add_relationship({ + "hash": sib_hash, + "type": entry_type, + "title": sib_title, + "path": None, + "store": str(store_name), + }) - - # ALSO CHECK REVERSE RELATIONSHIPS FOR THE CURRENT FILE - # NOTE: This is now handled via recursive lookup above, which finds siblings through the parent. - # We keep this disabled to avoid adding the same relationships twice. - # If needed in future, can be re-enabled with better deduplication. - # for rev in reverse_rels: - # rev_path = rev['path'] - # rev_type = rev['type'] - # - # if any(r['hash'] == rev_path for r in found_relationships): continue - # - # rev_path_obj = Path(rev_path) - # rev_title = rev_path_obj.stem - # try: - # rev_tags = db.get_tags(rev_path_obj) - # for t in rev_tags: - # if t.lower().startswith('title:'): - # rev_title = t[6:].strip(); break - # except Exception: pass - # - # # If someone points to us as 'alt' or 'king', they are our 'child' or 'subject' - # # But we'll just list them with the relationship type they used - # found_relationships.append({ - # "hash": rev_path, - # "type": f"reverse-{rev_type}", # e.g. reverse-alt - # "title": rev_title, - # "path": rev_path, - # "store": "local" - # }) - except Exception as e: - log(f"Error checking local relationships: {e}", file=sys.stderr) + log(f"Error checking store relationships: {e}", file=sys.stderr) # If we found local relationships, we can stop or merge with Hydrus? # For now, if we found local ones, let's show them. # But if the file is also in Hydrus, we might want those too. # Let's try Hydrus if we have a hash. - hash_hex = get_hash_for_operation(override_hash, result) + hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_hash_for_operation(None, result)) if hash_hex and not local_db_checked: try: - client = hydrus_wrapper.get_client(config) + client = None + store_label = "hydrus" + if store_name: + # Store specified: do not fall back to a global/default Hydrus client. + store_label = str(store_name) + try: + store = Store(config) + backend = store[str(store_name)] + candidate = getattr(backend, "_client", None) + if candidate is not None and hasattr(candidate, "get_file_relationships"): + client = candidate + except Exception: + client = None + if client is None: + log(f"Hydrus client unavailable for store '{store_name}'", file=sys.stderr) + return 1 + else: + client = hydrus_wrapper.get_client(config) + if client: rel = client.get_file_relationships(hash_hex) if rel: @@ -322,38 +248,66 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: this_file_rels = file_rels.get(hash_hex) if this_file_rels: - # Map Hydrus relationship IDs to names - # 0: potential duplicates, 1: false positives, 2: false positives (alternates), - # 3: duplicates, 4: alternatives, 8: king - # This mapping is approximate based on Hydrus API docs/behavior + # Map Hydrus relationship IDs to names. + # For /manage_file_relationships/get_file_relationships, the Hydrus docs define: + # 0=potential duplicates, 1=false positives, 3=alternates, 8=duplicates + # Additionally, this endpoint includes metadata keys like 'king'/'is_king'. rel_map = { - "0": "potential duplicate", + "0": "potential", "1": "false positive", - "2": "false positive", - "3": "duplicate", - "4": "alternative", - "8": "king" + "3": "alternate", + "8": "duplicate", } - - for rel_type_id, hash_list in this_file_rels.items(): - # Skip metadata keys - if rel_type_id in {"is_king", "king", "king_is_on_file_domain", "king_is_local"}: + + for rel_type_id, rel_value in this_file_rels.items(): + key = str(rel_type_id) + + # Handle metadata keys explicitly. + if key in {"is_king", "king_is_on_file_domain", "king_is_local"}: continue - - rel_name = rel_map.get(str(rel_type_id), f"type-{rel_type_id}") - - if isinstance(hash_list, list): - for rel_hash in hash_list: - if isinstance(rel_hash, str) and rel_hash and rel_hash != hash_hex: - # Check if we already have this hash from local DB - if not any(r['hash'] == rel_hash for r in found_relationships): - found_relationships.append({ - "hash": rel_hash, - "type": rel_name, - "title": rel_hash, # Can't resolve title easily without another API call - "path": None, - "store": "hydrus" - }) + + # Some Hydrus responses provide a direct king hash under the 'king' key. + if key == "king": + king_hash = normalize_hash(rel_value) if isinstance(rel_value, str) else None + if king_hash and king_hash != hash_hex: + if not any(str(r.get('hash', '')).lower() == king_hash for r in found_relationships): + found_relationships.append({ + "hash": king_hash, + "type": "king", + "title": king_hash, + "path": None, + "store": store_label, + }) + continue + + rel_name = rel_map.get(key, f"type-{key}") + + # The relationship value is typically a list of hashes. + if isinstance(rel_value, list): + for rel_hash in rel_value: + rel_hash_norm = normalize_hash(rel_hash) if isinstance(rel_hash, str) else None + if not rel_hash_norm or rel_hash_norm == hash_hex: + continue + if not any(str(r.get('hash', '')).lower() == rel_hash_norm for r in found_relationships): + found_relationships.append({ + "hash": rel_hash_norm, + "type": rel_name, + "title": rel_hash_norm, # Can't resolve title easily without another API call + "path": None, + "store": store_label, + }) + # Defensive: sometimes the API may return a single hash string. + elif isinstance(rel_value, str): + rel_hash_norm = normalize_hash(rel_value) + if rel_hash_norm and rel_hash_norm != hash_hex: + if not any(str(r.get('hash', '')).lower() == rel_hash_norm for r in found_relationships): + found_relationships.append({ + "hash": rel_hash_norm, + "type": rel_name, + "title": rel_hash_norm, + "path": None, + "store": store_label, + }) except Exception as exc: # Only log error if we didn't find local relationships either if not found_relationships: @@ -374,7 +328,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: return 0 elif t == 'derivative': return 1 - elif t == 'alternative': + elif t in {'alternative', 'alternate', 'alt'}: return 2 elif t == 'duplicate': return 3 @@ -400,22 +354,14 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: "relationship_type": item['type'], "store": item['store'] } - if item['path']: - res_obj["path"] = item['path'] - res_obj["file_path"] = item['path'] - res_obj["target"] = item['path'] - else: - # If Hydrus, target is hash - res_obj["target"] = item['hash'] + # Target is always hash in store/hash-first mode + res_obj["target"] = item['hash'] pipeline_results.append(res_obj) # Set selection args # If it has a path, we can use it directly. If hash, maybe get-file -hash? - if item['path']: - table.set_row_selection_args(i, [item['path']]) - else: - table.set_row_selection_args(i, ["-hash", item['hash']]) + table.set_row_selection_args(i, ["-store", str(item['store']), "-hash", item['hash']]) ctx.set_last_result_table(table, pipeline_results) print(table) diff --git a/cmdlet/merge_file.py b/cmdlet/merge_file.py index 226dd1b..fef247f 100644 --- a/cmdlet/merge_file.py +++ b/cmdlet/merge_file.py @@ -27,10 +27,10 @@ from ._shared import ( import pipeline as ctx try: - from PyPDF2 import PdfWriter, PdfReader - HAS_PYPDF2 = True + from pypdf import PdfWriter, PdfReader + HAS_PYPDF = True except ImportError: - HAS_PYPDF2 = False + HAS_PYPDF = False PdfWriter = None PdfReader = None @@ -753,8 +753,8 @@ def _merge_text(files: List[Path], output: Path) -> bool: def _merge_pdf(files: List[Path], output: Path) -> bool: """Merge PDF files.""" - if (not HAS_PYPDF2) or (PdfWriter is None) or (PdfReader is None): - log("PyPDF2 is required for PDF merging. Install with: pip install PyPDF2", file=sys.stderr) + if (not HAS_PYPDF) or (PdfWriter is None) or (PdfReader is None): + log("pypdf is required for PDF merging. Install with: pip install pypdf", file=sys.stderr) return False try: diff --git a/cmdlet/screen_shot.py b/cmdlet/screen_shot.py index 97d3e25..69aaf99 100644 --- a/cmdlet/screen_shot.py +++ b/cmdlet/screen_shot.py @@ -10,6 +10,7 @@ import contextlib import hashlib import sys import time +from datetime import datetime import httpx from dataclasses import dataclass, field from pathlib import Path @@ -547,7 +548,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # ======================================================================== piped_results = normalize_result_input(result) - url_to_process = [] + url_to_process: List[Tuple[str, Any]] = [] # Extract url from piped results if piped_results: @@ -559,17 +560,17 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: ) if url: - url_to_process.append(str(url)) + url_to_process.append((str(url), item)) # Use positional arguments if no pipeline input if not url_to_process and positional_url: - url_to_process = positional_url + url_to_process = [(u, None) for u in positional_url] if not url_to_process: log(f"No url to process for screen-shot cmdlet", file=sys.stderr) return 1 - debug(f"[_run] url to process: {url_to_process}") + debug(f"[_run] url to process: {[u for u, _ in url_to_process]}") # ======================================================================== # OUTPUT DIRECTORY RESOLUTION - Priority chain @@ -623,7 +624,35 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # PROCESS url AND CAPTURE SCREENSHOTS # ======================================================================== - for url in url_to_process: + def _extract_item_tags(item: Any) -> List[str]: + if item is None: + return [] + raw = get_field(item, 'tag') + if isinstance(raw, list): + return [str(t) for t in raw if t is not None and str(t).strip()] + if isinstance(raw, str) and raw.strip(): + return [raw.strip()] + return [] + + def _extract_item_title(item: Any) -> str: + if item is None: + return "" + for key in ("title", "name", "filename"): + val = get_field(item, key) + if val is None: + continue + text = str(val).strip() + if text: + return text + return "" + + def _clean_title(text: str) -> str: + value = (text or "").strip() + if value.lower().startswith("screenshot:"): + value = value.split(":", 1)[1].strip() + return value + + for url, origin_item in url_to_process: # Validate URL format if not url.lower().startswith(("http://", "https://", "file://")): log(f"[screen_shot] Skipping non-URL input: {url}", file=sys.stderr) @@ -660,15 +689,34 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: pass # Create PipeObject result - marked as TEMP since derivative artifact + capture_date = "" + try: + capture_date = datetime.fromtimestamp(screenshot_result.path.stat().st_mtime).date().isoformat() + except Exception: + capture_date = datetime.now().date().isoformat() + + upstream_title = _clean_title(_extract_item_title(origin_item)) + display_title = upstream_title or url + + upstream_tags = _extract_item_tags(origin_item) + filtered_upstream_tags = [ + t for t in upstream_tags + if not str(t).strip().lower().startswith(("type:", "date:")) + ] + merged_tags = unique_preserve_order( + ["type:screenshot", f"date:{capture_date}"] + filtered_upstream_tags + ) + pipe_obj = create_pipe_object_result( source='screenshot', identifier=Path(screenshot_result.path).stem, file_path=str(screenshot_result.path), cmdlet_name='screen-shot', - title=f"Screenshot: {Path(screenshot_result.path).name}", + title=display_title, hash_value=screenshot_hash, is_temp=True, parent_hash=hashlib.sha256(url.encode()).hexdigest(), + tag=merged_tags, extra={ 'source_url': url, 'archive_url': screenshot_result.archive_url, diff --git a/cmdlet/search_provider.py b/cmdlet/search_provider.py index 70e7aac..29c52d8 100644 --- a/cmdlet/search_provider.py +++ b/cmdlet/search_provider.py @@ -1,4 +1,4 @@ -"""search-provider cmdlet: Search external providers (bandcamp, libgen, soulseek, youtube).""" +"""search-provider cmdlet: Search external providers (bandcamp, libgen, soulseek, youtube, alldebrid).""" from __future__ import annotations from typing import Any, Dict, List, Sequence, Optional @@ -25,23 +25,27 @@ class Search_Provider(Cmdlet): def __init__(self): super().__init__( name="search-provider", - summary="Search external providers (bandcamp, libgen, soulseek, youtube)", - usage="search-provider [-limit N]", + summary="Search external providers (bandcamp, libgen, soulseek, youtube, alldebrid)", + usage="search-provider -provider [-limit N] [-open ID]", arg=[ - CmdletArg("provider", type="string", required=True, description="Provider name: bandcamp, libgen, soulseek, youtube"), + CmdletArg("provider", type="string", required=True, description="Provider name: bandcamp, libgen, soulseek, youtube, alldebrid"), CmdletArg("query", type="string", required=True, description="Search query (supports provider-specific syntax)"), CmdletArg("limit", type="int", description="Maximum results to return (default: 50)"), + CmdletArg("open", type="int", description="(alldebrid) Open folder/magnet by ID and list its files"), ], detail=[ "Search external content providers:", + "- alldebrid: List your AllDebrid account folders (magnets). Select @N to view files.", + " Example: search-provider -provider alldebrid \"*\"", + " Example: search-provider -provider alldebrid -open 123 \"*\"", "- bandcamp: Search for music albums/tracks", - " Example: search-provider bandcamp \"artist:altrusian grace\"", + " Example: search-provider -provider bandcamp \"artist:altrusian grace\"", "- libgen: Search Library Genesis for books", - " Example: search-provider libgen \"python programming\"", + " Example: search-provider -provider libgen \"python programming\"", "- soulseek: Search P2P network for music", - " Example: search-provider soulseek \"pink floyd\"", + " Example: search-provider -provider soulseek \"pink floyd\"", "- youtube: Search YouTube for videos", - " Example: search-provider youtube \"tutorial\"", + " Example: search-provider -provider youtube \"tutorial\"", "", "Query syntax:", "- bandcamp: Use 'artist:Name' to search by artist", @@ -50,7 +54,7 @@ class Search_Provider(Cmdlet): "- youtube: Plain text search", "", "Results can be piped to other cmdlet:", - " search-provider bandcamp \"artist:grace\" | @1 | download-data", + " search-provider -provider bandcamp \"artist:grace\" | @1 | download-data", ], exec=self.run ) @@ -61,10 +65,62 @@ class Search_Provider(Cmdlet): if should_show_help(args): ctx.emit(self.__dict__) return 0 - - # Parse arguments - if len(args) < 2: - log("Error: search-provider requires and arguments", file=sys.stderr) + + args_list = [str(a) for a in (args or [])] + + # Dynamic flag variants from cmdlet arg definitions. + flag_registry = self.build_flag_registry() + provider_flags = {f.lower() for f in (flag_registry.get("provider") or {"-provider", "--provider"})} + query_flags = {f.lower() for f in (flag_registry.get("query") or {"-query", "--query"})} + limit_flags = {f.lower() for f in (flag_registry.get("limit") or {"-limit", "--limit"})} + open_flags = {f.lower() for f in (flag_registry.get("open") or {"-open", "--open"})} + + provider_name: Optional[str] = None + query: Optional[str] = None + limit = 50 + open_id: Optional[int] = None + positionals: List[str] = [] + + i = 0 + while i < len(args_list): + token = args_list[i] + low = token.lower() + if low in provider_flags and i + 1 < len(args_list): + provider_name = args_list[i + 1] + i += 2 + elif low in query_flags and i + 1 < len(args_list): + query = args_list[i + 1] + i += 2 + elif low in limit_flags and i + 1 < len(args_list): + try: + limit = int(args_list[i + 1]) + except ValueError: + log(f"Warning: Invalid limit value '{args_list[i + 1]}', using default 50", file=sys.stderr) + limit = 50 + i += 2 + elif low in open_flags and i + 1 < len(args_list): + try: + open_id = int(args_list[i + 1]) + except ValueError: + log(f"Warning: Invalid open value '{args_list[i + 1]}', ignoring", file=sys.stderr) + open_id = None + i += 2 + elif not token.startswith("-"): + positionals.append(token) + i += 1 + else: + i += 1 + + # Backwards-compatible positional form: search-provider + if provider_name is None and positionals: + provider_name = positionals[0] + positionals = positionals[1:] + + if query is None and positionals: + query = " ".join(positionals).strip() or None + + if not provider_name or not query: + log("Error: search-provider requires a provider and query", file=sys.stderr) log(f"Usage: {self.usage}", file=sys.stderr) log("Available providers:", file=sys.stderr) providers = list_search_providers(config) @@ -73,17 +129,6 @@ class Search_Provider(Cmdlet): log(f" {status} {name}", file=sys.stderr) return 1 - provider_name = args[0] - query = args[1] - - # Parse optional limit - limit = 50 - if len(args) >= 4 and args[2] in ("-limit", "--limit"): - try: - limit = int(args[3]) - except ValueError: - log(f"Warning: Invalid limit value '{args[3]}', using default 50", file=sys.stderr) - debug(f"[search-provider] provider={provider_name}, query={query}, limit={limit}") # Get provider @@ -120,13 +165,34 @@ class Search_Provider(Cmdlet): importlib.reload(result_table) from result_table import ResultTable - table_title = f"Search: {query} [{provider_name}]" + provider_text = str(provider_name or "").strip() + provider_lower = provider_text.lower() + if provider_lower == "youtube": + provider_label = "Youtube" + elif provider_lower == "openlibrary": + provider_label = "OpenLibrary" + else: + provider_label = provider_text[:1].upper() + provider_text[1:] if provider_text else "Provider" + + if provider_lower == "alldebrid" and open_id is not None: + table_title = f"{provider_label} Files: {open_id}".strip().rstrip(":") + else: + table_title = f"{provider_label}: {query}".strip().rstrip(":") preserve_order = provider_name.lower() in ('youtube', 'openlibrary') table = ResultTable(table_title).set_preserve_order(preserve_order) table.set_table(provider_name) + table.set_source_command("search-provider", list(args)) debug(f"[search-provider] Calling {provider_name}.search()") - results = provider.search(query, limit=limit) + if provider_lower == "alldebrid": + if open_id is not None: + # Second-stage: show files for selected folder/magnet. + results = provider.search(query, limit=limit, filters={"view": "files", "magnet_id": open_id}) + else: + # Default: show folders (magnets) so user can select @N. + results = provider.search(query, limit=limit, filters={"view": "folders"}) + else: + results = provider.search(query, limit=limit) debug(f"[search-provider] Got {len(results)} results") if not results: @@ -143,11 +209,26 @@ class Search_Provider(Cmdlet): if 'table' not in item_dict: item_dict['table'] = provider_name + row_index = len(table.rows) table.add_result(search_result) # ResultTable handles SearchResult objects + + # For AllDebrid folder rows, allow @N to open and show files. + try: + if provider_lower == "alldebrid" and getattr(search_result, "media_kind", "") == "folder": + magnet_id = None + meta = getattr(search_result, "full_metadata", None) + if isinstance(meta, dict): + magnet_id = meta.get("magnet_id") + if magnet_id is not None: + table.set_row_selection_args(row_index, ["-open", str(magnet_id), "-query", "*"]) + except Exception: + pass results_list.append(item_dict) ctx.emit(item_dict) ctx.set_last_result_table(table, results_list) + # Ensure @N selection expands against this newly displayed table. + ctx.set_current_stage_table(table) db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2)) db.update_worker_status(worker_id, 'completed') @@ -165,5 +246,8 @@ class Search_Provider(Cmdlet): return 1 -# Register cmdlet instance -Search_Provider_Instance = Search_Provider() +# Register cmdlet instance (catalog + REPL autocomplete expects module-level CMDLET) +CMDLET = Search_Provider() + +# Backwards-compatible alias +Search_Provider_Instance = CMDLET diff --git a/cmdlet/search_store.py b/cmdlet/search_store.py index d3081f5..4b05875 100644 --- a/cmdlet/search_store.py +++ b/cmdlet/search_store.py @@ -1,7 +1,7 @@ """Search-store cmdlet: Search for files in storage backends (Folder, Hydrus).""" from __future__ import annotations -from typing import Any, Dict, Sequence, List, Optional, Tuple +from typing import Any, Dict, Sequence, List, Optional from pathlib import Path from collections import OrderedDict import re @@ -10,7 +10,7 @@ import sys from SYS.logger import log, debug -from ._shared import Cmdlet, CmdletArg, get_field, should_show_help, normalize_hash, first_title_tag +from ._shared import Cmdlet, CmdletArg, SharedArgs, get_field, should_show_help, normalize_hash, first_title_tag import pipeline as ctx @@ -24,21 +24,16 @@ class Search_Store(Cmdlet): super().__init__( name="search-store", summary="Search storage backends (Folder, Hydrus) for files.", - usage="search-store [query] [-tag TAG] [-size >100MB|<50MB] [-type audio|video|image] [-duration >10:00] [-store BACKEND]", + usage="search-store [query] [-store BACKEND] [-limit N]", arg=[ CmdletArg("query", description="Search query string"), - CmdletArg("tag", description="Filter by tag (can be used multiple times)"), - CmdletArg("size", description="Filter by size: >100MB, <50MB, =10MB"), - CmdletArg("type", description="Filter by type: audio, video, image, document"), - CmdletArg("duration", description="Filter by duration: >10:00, <1:30:00"), CmdletArg("limit", type="integer", description="Limit results (default: 100)"), - CmdletArg("store", description="Search specific storage backend (e.g., 'home', 'test', or 'default')"), + SharedArgs.STORE, ], detail=[ "Search across storage backends: Folder stores and Hydrus instances", "Use -store to search a specific backend by name", "URL search: url:* (any URL) or url: (URL substring)", - "Filter results by: tag, size, type, duration", "Results include hash for downstream commands (get-file, add-tag, etc.)", "Examples:", "search-store foo # Search all storage backends", @@ -46,8 +41,6 @@ class Search_Store(Cmdlet): "search-store -store test 'video' # Search 'test' folder store", "search-store 'url:*' # Files that have any URL", "search-store 'url:youtube.com' # Files whose URL contains substring", - "search-store song -type audio # Search for audio files", - "search-store movie -tag action # Search with tag filter", ], exec=self.run, ) @@ -136,12 +129,14 @@ class Search_Store(Cmdlet): args_list = [str(arg) for arg in (args or [])] + # Build dynamic flag variants from cmdlet arg definitions. + # This avoids hardcoding flag spellings in parsing loops. + flag_registry = self.build_flag_registry() + store_flags = {f.lower() for f in (flag_registry.get("store") or {"-store", "--store"})} + limit_flags = {f.lower() for f in (flag_registry.get("limit") or {"-limit", "--limit"})} + # Parse arguments query = "" - _tag_filters: List[str] = [] - _size_filter: Optional[Tuple[str, int]] = None - _duration_filter: Optional[Tuple[str, float]] = None - _type_filter: Optional[str] = None storage_backend: Optional[str] = None limit = 100 searched_backends: List[str] = [] @@ -150,21 +145,15 @@ class Search_Store(Cmdlet): while i < len(args_list): arg = args_list[i] low = arg.lower() - if low in {"-store", "--store"} and i + 1 < len(args_list): + if low in store_flags and i + 1 < len(args_list): storage_backend = args_list[i + 1] i += 2 - elif low in {"-tag", "--tag"} and i + 1 < len(args_list): - _tag_filters.append(args_list[i + 1]) - i += 2 - elif low in {"-limit", "--limit"} and i + 1 < len(args_list): + elif low in limit_flags and i + 1 < len(args_list): try: limit = int(args_list[i + 1]) except ValueError: limit = 100 i += 2 - elif low in {"-type", "--type"} and i + 1 < len(args_list): - _type_filter = args_list[i + 1].lower() - i += 2 elif not arg.startswith("-"): query = f"{query} {arg}".strip() if query else arg i += 1 @@ -268,12 +257,16 @@ class Search_Store(Cmdlet): # Resolve a path/URL string if possible path_str: Optional[str] = None + # IMPORTANT: avoid calling get_file() for remote backends. + # For Hydrus, get_file() returns a browser URL (and may include access keys), + # which should not be pulled during search/refresh. try: - maybe_path = resolved_backend.get_file(h) - if isinstance(maybe_path, Path): - path_str = str(maybe_path) - elif isinstance(maybe_path, str) and maybe_path: - path_str = maybe_path + if type(resolved_backend).__name__ == "Folder": + maybe_path = resolved_backend.get_file(h) + if isinstance(maybe_path, Path): + path_str = str(maybe_path) + elif isinstance(maybe_path, str) and maybe_path: + path_str = maybe_path except Exception: path_str = None diff --git a/cmdnat/matrix.py b/cmdnat/matrix.py index d2fa280..0eab195 100644 --- a/cmdnat/matrix.py +++ b/cmdnat/matrix.py @@ -1,29 +1,436 @@ -from typing import Any, Dict, Sequence, List +from __future__ import annotations + +from pathlib import Path +from typing import Any, Dict, List, Optional, Sequence import sys -from cmdlet._shared import Cmdlet, CmdletArg, parse_cmdlet_args +import tempfile +import re +import uuid +from urllib.parse import parse_qs, urlparse + +from cmdlet._shared import Cmdlet, CmdletArg from SYS.logger import log, debug from result_table import ResultTable -# REFACTOR: Commenting out Matrix import until provider refactor is complete -from config import save_config, load_config import pipeline as ctx + +_MATRIX_PENDING_ITEMS_KEY = "matrix_pending_items" + + +def _normalize_to_list(value: Any) -> List[Any]: + if value is None: + return [] + if isinstance(value, list): + return value + return [value] + + +def _extract_room_id(room_obj: Any) -> Optional[str]: + try: + # PipeObject stores unknown fields in .extra + if hasattr(room_obj, "extra"): + extra = getattr(room_obj, "extra") + if isinstance(extra, dict): + rid = extra.get("room_id") + if isinstance(rid, str) and rid.strip(): + return rid.strip() + # Dict fallback + if isinstance(room_obj, dict): + rid = room_obj.get("room_id") + if isinstance(rid, str) and rid.strip(): + return rid.strip() + except Exception: + pass + return None + + +def _extract_file_path(item: Any) -> Optional[str]: + """Best-effort local file path extraction. + + Returns a filesystem path string only if it exists. + """ + def _maybe_local_path(value: Any) -> Optional[str]: + if value is None: + return None + if isinstance(value, Path): + candidate_path = value + else: + text = str(value).strip() + if not text: + return None + # Treat URLs as not-local. + if text.startswith("http://") or text.startswith("https://"): + return None + candidate_path = Path(text).expanduser() + try: + if candidate_path.exists(): + return str(candidate_path) + except Exception: + return None + return None + + try: + if hasattr(item, "path"): + found = _maybe_local_path(getattr(item, "path")) + if found: + return found + if hasattr(item, "file_path"): + found = _maybe_local_path(getattr(item, "file_path")) + if found: + return found + if isinstance(item, dict): + for key in ("path", "file_path", "target"): + found = _maybe_local_path(item.get(key)) + if found: + return found + except Exception: + pass + return None + + +def _extract_url(item: Any) -> Optional[str]: + try: + if hasattr(item, "url"): + raw = getattr(item, "url") + if isinstance(raw, str) and raw.strip(): + return raw.strip() + if isinstance(raw, (list, tuple)): + for v in raw: + if isinstance(v, str) and v.strip(): + return v.strip() + if hasattr(item, "source_url"): + raw = getattr(item, "source_url") + if isinstance(raw, str) and raw.strip(): + return raw.strip() + if isinstance(item, dict): + for key in ("url", "source_url", "path", "target"): + raw = item.get(key) + if isinstance(raw, str) and raw.strip() and raw.strip().startswith(("http://", "https://")): + return raw.strip() + except Exception: + pass + return None + + +_SHA256_RE = re.compile(r"^[0-9a-fA-F]{64}$") + + +def _extract_sha256_hex(item: Any) -> Optional[str]: + try: + if hasattr(item, "hash"): + h = getattr(item, "hash") + if isinstance(h, str) and _SHA256_RE.fullmatch(h.strip()): + return h.strip().lower() + if isinstance(item, dict): + h = item.get("hash") + if isinstance(h, str) and _SHA256_RE.fullmatch(h.strip()): + return h.strip().lower() + except Exception: + pass + return None + + +def _extract_hash_from_hydrus_file_url(url: str) -> Optional[str]: + try: + parsed = urlparse(url) + if not (parsed.path or "").endswith("/get_files/file"): + return None + qs = parse_qs(parsed.query or "") + h = (qs.get("hash") or [None])[0] + if isinstance(h, str) and _SHA256_RE.fullmatch(h.strip()): + return h.strip().lower() + except Exception: + pass + return None + + +def _maybe_download_hydrus_file(item: Any, config: Dict[str, Any], output_dir: Path) -> Optional[str]: + """If the item looks like a Hydrus file (hash + Hydrus URL), download it using Hydrus access key headers. + + This avoids 401 from Hydrus when the URL is /get_files/file?hash=... without headers. + """ + try: + from config import get_hydrus_access_key, get_hydrus_url + from API.HydrusNetwork import HydrusNetwork as HydrusClient, download_hydrus_file + + # Prefer per-item Hydrus instance name when it matches a configured instance. + store_name = None + if isinstance(item, dict): + store_name = item.get("store") + else: + store_name = getattr(item, "store", None) + store_name = str(store_name).strip() if store_name else "" + + # Try the store name as instance key first; fallback to "home". + instance_candidates = [s for s in [store_name.lower(), "home"] if s] + hydrus_url = None + access_key = None + for inst in instance_candidates: + access_key = (get_hydrus_access_key(config, inst) or "").strip() or None + hydrus_url = (get_hydrus_url(config, inst) or "").strip() or None + if access_key and hydrus_url: + break + + if not access_key or not hydrus_url: + return None + + url = _extract_url(item) + file_hash = _extract_sha256_hex(item) + if url and not file_hash: + file_hash = _extract_hash_from_hydrus_file_url(url) + + # If it doesn't look like a Hydrus file, skip. + if not file_hash: + return None + + # Only treat it as Hydrus when we have a matching /get_files/file URL OR the item store suggests it. + is_hydrus_url = False + if url: + parsed = urlparse(url) + is_hydrus_url = (parsed.path or "").endswith("/get_files/file") and _extract_hash_from_hydrus_file_url(url) == file_hash + hydrus_instances: set[str] = set() + try: + store_cfg = (config or {}).get("store") if isinstance(config, dict) else None + if isinstance(store_cfg, dict): + hydrus_cfg = store_cfg.get("hydrusnetwork") + if isinstance(hydrus_cfg, dict): + hydrus_instances = {str(k).strip().lower() for k in hydrus_cfg.keys() if str(k).strip()} + except Exception: + hydrus_instances = set() + + store_hint = store_name.lower() in {"hydrus", "hydrusnetwork"} or (store_name.lower() in hydrus_instances) + if not (is_hydrus_url or store_hint): + return None + + client = HydrusClient(url=hydrus_url, access_key=access_key, timeout=30.0) + file_url = url if (url and is_hydrus_url) else client.file_url(file_hash) + + # Best-effort extension from Hydrus metadata. + suffix = ".hydrus" + try: + meta_response = client.fetch_file_metadata(hashes=[file_hash], include_mime=True) + entries = meta_response.get("metadata") if isinstance(meta_response, dict) else None + if isinstance(entries, list) and entries: + entry = entries[0] + if isinstance(entry, dict): + ext = entry.get("ext") + if isinstance(ext, str) and ext.strip(): + cleaned = ext.strip() + if not cleaned.startswith("."): + cleaned = "." + cleaned.lstrip(".") + if len(cleaned) <= 12: + suffix = cleaned + except Exception: + pass + + output_dir.mkdir(parents=True, exist_ok=True) + dest = output_dir / f"{file_hash}{suffix}" + if dest.exists(): + # Avoid clobbering; pick a unique name. + dest = output_dir / f"{file_hash}_{uuid.uuid4().hex[:10]}{suffix}" + + headers = {"Hydrus-Client-API-Access-Key": access_key} + download_hydrus_file(file_url, headers, dest, timeout=30.0) + if dest.exists(): + return str(dest) + except Exception as exc: + debug(f"[matrix] Hydrus export failed: {exc}") + return None + + +def _maybe_unlock_alldebrid_url(url: str, config: Dict[str, Any]) -> str: + try: + parsed = urlparse(url) + host = (parsed.netloc or "").lower() + if host != "alldebrid.com": + return url + if not (parsed.path or "").startswith("/f/"): + return url + + try: + from Provider.alldebrid import _get_debrid_api_key # type: ignore + + api_key = _get_debrid_api_key(config or {}) + except Exception: + api_key = None + if not api_key: + return url + + from API.alldebrid import AllDebridClient + + client = AllDebridClient(str(api_key)) + unlocked = client.unlock_link(url) + if isinstance(unlocked, str) and unlocked.strip(): + return unlocked.strip() + except Exception: + pass + return url + + +def _resolve_upload_path(item: Any, config: Dict[str, Any]) -> Optional[str]: + """Resolve a usable local file path for uploading. + + - Prefer existing local file paths. + - Otherwise, if the item has an http(s) URL, download it to a temp directory. + """ + local = _extract_file_path(item) + if local: + return local + + # If this is a Hydrus-backed item (e.g. /get_files/file?hash=...), download it with Hydrus headers. + try: + base_tmp = None + if isinstance(config, dict): + base_tmp = config.get("temp") + output_dir = Path(str(base_tmp)).expanduser() if base_tmp else (Path(tempfile.gettempdir()) / "Medios-Macina") + output_dir = output_dir / "matrix" / "hydrus" + hydrus_path = _maybe_download_hydrus_file(item, config, output_dir) + if hydrus_path: + return hydrus_path + except Exception: + pass + + url = _extract_url(item) + if not url: + return None + + # Best-effort: unlock AllDebrid file links (they require auth and aren't directly downloadable). + url = _maybe_unlock_alldebrid_url(url, config) + + try: + from SYS.download import _download_direct_file + + base_tmp = None + if isinstance(config, dict): + base_tmp = config.get("temp") + output_dir = Path(str(base_tmp)).expanduser() if base_tmp else (Path(tempfile.gettempdir()) / "Medios-Macina") + output_dir = output_dir / "matrix" + output_dir.mkdir(parents=True, exist_ok=True) + result = _download_direct_file(url, output_dir, quiet=True) + if result and hasattr(result, "path") and isinstance(result.path, Path) and result.path.exists(): + return str(result.path) + except Exception as exc: + debug(f"[matrix] Failed to download URL for upload: {exc}") + + return None + + def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - # REFACTOR: Matrix cmdlet temporarily disabled during storage provider refactor - log("⚠️ Matrix cmdlet is temporarily disabled during refactor", file=sys.stderr) - return 1 + # Internal stage: send previously selected items to selected rooms. + if any(str(a).lower() == "-send" for a in (args or [])): + rooms = _normalize_to_list(result) + room_ids: List[str] = [] + for r in rooms: + rid = _extract_room_id(r) + if rid: + room_ids.append(rid) + if not room_ids: + log("No Matrix room selected (use @N on the rooms table)", file=sys.stderr) + return 1 + + pending_items = ctx.load_value(_MATRIX_PENDING_ITEMS_KEY, default=[]) + items = _normalize_to_list(pending_items) + if not items: + log("No pending items to upload (use: @N | .matrix)", file=sys.stderr) + return 1 + + from Provider.matrix import Matrix + try: + provider = Matrix(config) + except Exception as exc: + log(f"Matrix not available: {exc}", file=sys.stderr) + return 1 + + any_failed = False + for rid in room_ids: + for item in items: + file_path = _resolve_upload_path(item, config) + if not file_path: + any_failed = True + log("Matrix upload requires a local file (path) or a direct URL on the selected item", file=sys.stderr) + continue + try: + link = provider.upload_to_room(file_path, rid) + debug(f"✓ Sent {Path(file_path).name} -> {rid}") + if link: + log(link) + except Exception as exc: + any_failed = True + log(f"Matrix send failed for {Path(file_path).name}: {exc}", file=sys.stderr) + + # Clear pending items once we've attempted to send. + ctx.store_value(_MATRIX_PENDING_ITEMS_KEY, []) + return 1 if any_failed else 0 + + # Default stage: show rooms, then wait for @N selection to resume sending. + selected_items = _normalize_to_list(result) + if not selected_items: + log("Usage: @N | .matrix (select items first, then pick a room)", file=sys.stderr) + return 1 + + ctx.store_value(_MATRIX_PENDING_ITEMS_KEY, selected_items) + + from Provider.matrix import Matrix + try: + provider = Matrix(config) + except Exception as exc: + log(f"Matrix not available: {exc}", file=sys.stderr) + return 1 + + try: + rooms = provider.list_rooms() + except Exception as exc: + log(f"Failed to list Matrix rooms: {exc}", file=sys.stderr) + return 1 + + if not rooms: + log("No joined rooms found.", file=sys.stderr) + return 0 + + table = ResultTable("Matrix Rooms") + table.set_table("matrix") + table.set_source_command(".matrix", []) + + for room in rooms: + row = table.add_row() + name = str(room.get("name") or "").strip() if isinstance(room, dict) else "" + room_id = str(room.get("room_id") or "").strip() if isinstance(room, dict) else "" + row.add_column("Name", name) + row.add_column("Room", room_id) + + # Make selection results clearer: stash a friendly title/store on the backing items. + # This avoids confusion when the selection handler prints PipeObject debug info. + room_items: List[Dict[str, Any]] = [] + for room in rooms: + if not isinstance(room, dict): + continue + room_id = str(room.get("room_id") or "").strip() + name = str(room.get("name") or "").strip() + room_items.append( + { + **room, + "store": "matrix", + "title": name or room_id or "Matrix Room", + } + ) + + # Overlay table: user selects @N, then we resume with `.matrix -send`. + ctx.set_last_result_table_overlay(table, room_items) + ctx.set_current_stage_table(table) + ctx.set_pending_pipeline_tail([[".matrix", "-send"]], ".matrix") + + print() + print(table.format_plain()) + print("\nSelect room(s) with @N (e.g. @1 or @1-3) to send the selected item(s)") + return 0 CMDLET = Cmdlet( name=".matrix", alias=["matrix", "rooms"], - summary="List and select default Matrix room", - usage=".matrix [selection]", + summary="Send selected items to a Matrix room", + usage="@N | .matrix", arg=[ - CmdletArg( - name="selection", - type="string", - description="Index or ID of the room to set as default", - required=False - ) + CmdletArg(name="send", type="bool", description="(internal) Send to selected room(s)", required=False), ], exec=_run ) diff --git a/cmdnat/pipe.py b/cmdnat/pipe.py index 06dffef..e3c16e1 100644 --- a/cmdnat/pipe.py +++ b/cmdnat/pipe.py @@ -12,7 +12,6 @@ from SYS.logger import debug, get_thread_stream, is_debug_enabled, set_debug, se from result_table import ResultTable from MPV.mpv_ipc import MPV import pipeline as ctx -from SYS.download import is_url_supported_by_ytdlp from models import PipeObject from API.folder import LocalLibrarySearchOptimizer @@ -20,6 +19,78 @@ from config import get_local_storage_path, get_hydrus_access_key, get_hydrus_url from hydrus_health_check import get_cookies_file_path +_ALLDEBRID_UNLOCK_CACHE: Dict[str, str] = {} + + +def _get_alldebrid_api_key(config: Optional[Dict[str, Any]]) -> Optional[str]: + try: + if not isinstance(config, dict): + return None + provider_cfg = config.get("provider") + if not isinstance(provider_cfg, dict): + return None + ad_cfg = provider_cfg.get("alldebrid") + if not isinstance(ad_cfg, dict): + return None + key = ad_cfg.get("api_key") + if not isinstance(key, str): + return None + key = key.strip() + return key or None + except Exception: + return None + + +def _is_alldebrid_protected_url(url: str) -> bool: + try: + if not isinstance(url, str): + return False + u = url.strip() + if not u.startswith(("http://", "https://")): + return False + p = urlparse(u) + host = (p.netloc or "").lower() + path = p.path or "" + # AllDebrid file page links (require auth; not directly streamable by mpv) + return host == "alldebrid.com" and path.startswith("/f/") + except Exception: + return False + + +def _maybe_unlock_alldebrid_url(url: str, config: Optional[Dict[str, Any]]) -> str: + """Convert AllDebrid protected file URLs into direct streamable links. + + When AllDebrid returns `https://alldebrid.com/f/...`, that URL typically requires + authentication. MPV cannot access it without credentials. We transparently call + the AllDebrid API `link/unlock` (using the configured API key) to obtain a direct + URL that MPV can stream. + """ + if not _is_alldebrid_protected_url(url): + return url + + cached = _ALLDEBRID_UNLOCK_CACHE.get(url) + if isinstance(cached, str) and cached: + return cached + + api_key = _get_alldebrid_api_key(config) + if not api_key: + return url + + try: + from API.alldebrid import AllDebridClient + + client = AllDebridClient(api_key) + unlocked = client.unlock_link(url) + if isinstance(unlocked, str) and unlocked.strip(): + unlocked = unlocked.strip() + _ALLDEBRID_UNLOCK_CACHE[url] = unlocked + return unlocked + except Exception as e: + debug(f"AllDebrid unlock failed for MPV target: {e}", file=sys.stderr) + + return url + + def _ensure_lyric_overlay(mpv: MPV) -> None: try: mpv.ensure_lyric_loader_running() @@ -621,6 +692,13 @@ def _queue_items( target, title = result + # If the target is an AllDebrid protected file URL, unlock it to a direct link for MPV. + try: + if isinstance(target, str): + target = _maybe_unlock_alldebrid_url(target, config) + except Exception: + pass + # Prefer per-item Hydrus instance credentials when the item belongs to a Hydrus store. effective_hydrus_url = hydrus_url effective_hydrus_header = hydrus_header @@ -665,21 +743,10 @@ def _queue_items( continue new_targets.add(norm_key) - # Check if it's a yt-dlp supported URL - is_ytdlp = False - # Treat any http(s) target as yt-dlp candidate. If the Python yt-dlp - # module is available we also check more deeply, but default to True - # so MPV can use its ytdl hooks for remote streaming sites. - is_hydrus_target = _is_hydrus_path(str(target), effective_hydrus_url) - try: - # Hydrus direct file URLs should not be treated as yt-dlp targets. - is_ytdlp = (not is_hydrus_target) and (target.startswith("http") or is_url_supported_by_ytdlp(target)) - except Exception: - is_ytdlp = (not is_hydrus_target) and target.startswith("http") - - # Use memory:// M3U hack to pass title to MPV - # Skip for yt-dlp url to ensure proper handling - if title and (is_hydrus_target or not is_ytdlp): + # Use memory:// M3U hack to pass title to MPV. + # This is especially important for remote URLs (e.g., YouTube) where MPV may otherwise + # show the raw URL as the playlist title. + if title: # Sanitize title for M3U (remove newlines) safe_title = title.replace('\n', ' ').replace('\r', '') @@ -703,8 +770,9 @@ def _queue_items( if clear_first and i == 0: mode = "replace" - # If this is a Hydrus path, set header property and yt-dlp headers before loading - if effective_hydrus_header and _is_hydrus_path(target_to_send, effective_hydrus_url): + # If this is a Hydrus path, set header property and yt-dlp headers before loading. + # Use the real target (not the memory:// wrapper) for detection. + if effective_hydrus_header and _is_hydrus_path(str(target), effective_hydrus_url): header_cmd = {"command": ["set_property", "http-header-fields", effective_hydrus_header], "request_id": 199} _send_ipc_command(header_cmd, silent=True) if effective_ytdl_opts: @@ -727,10 +795,8 @@ def _queue_items( _start_mpv(items[i:], config=config, start_opts=start_opts) return True elif resp.get("error") == "success": - # Also set property for good measure - if title: - title_cmd = {"command": ["set_property", "force-media-title", title], "request_id": 201} - _send_ipc_command(title_cmd) + # Do not set `force-media-title` when queueing items. It's a global property and + # would change the MPV window title even if the item isn't currently playing. debug(f"Queued: {title or target}") else: error_msg = str(resp.get('error')) @@ -1008,7 +1074,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: ctx.set_last_result_table_overlay(table, [p['items'] for p in playlists]) ctx.set_current_stage_table(table) - print(table) + # In pipeline mode, the CLI renders current-stage tables; printing here duplicates output. + suppress_direct_print = bool(isinstance(config, dict) and config.get("_quiet_background_output")) + if not suppress_direct_print: + print(table) return 0 # Everything below was originally outside a try block; keep it inside so `start_opts` is in scope. @@ -1153,9 +1222,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: debug("MPV is starting up...") return 0 - debug("MPV is not running. Starting new instance...") - _start_mpv([], config=config, start_opts=start_opts) - return 0 + # IPC is ready; continue without restarting MPV again. + else: + debug("MPV is not running. Starting new instance...") + _start_mpv([], config=config, start_opts=start_opts) + return 0 if not items: debug("MPV playlist is empty.") @@ -1314,7 +1385,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: ctx.set_last_result_table_overlay(table, pipe_objects) ctx.set_current_stage_table(table) - print(table) + # In pipeline mode, the CLI renders current-stage tables; printing here duplicates output. + suppress_direct_print = bool(isinstance(config, dict) and config.get("_quiet_background_output")) + if not suppress_direct_print: + print(table) return 0 finally: diff --git a/config.py b/config.py index 656f2d9..1ee6a26 100644 --- a/config.py +++ b/config.py @@ -339,6 +339,26 @@ def get_hydrus_url(config: Dict[str, Any], instance_name: str = "home") -> Optio return str(url).strip() if url else None +def get_provider_block(config: Dict[str, Any], name: str) -> Dict[str, Any]: + provider_cfg = config.get("provider") + if not isinstance(provider_cfg, dict): + return {} + block = provider_cfg.get(str(name).strip().lower()) + return block if isinstance(block, dict) else {} + + +def get_soulseek_username(config: Dict[str, Any]) -> Optional[str]: + block = get_provider_block(config, "soulseek") + val = block.get("username") or block.get("USERNAME") + return str(val).strip() if val else None + + +def get_soulseek_password(config: Dict[str, Any]) -> Optional[str]: + block = get_provider_block(config, "soulseek") + val = block.get("password") or block.get("PASSWORD") + return str(val).strip() if val else None + + def resolve_output_dir(config: Dict[str, Any]) -> Path: """Resolve output directory from config with single source of truth. diff --git a/metadata.py b/metadata.py index 847c45a..ea53a14 100644 --- a/metadata.py +++ b/metadata.py @@ -1401,7 +1401,7 @@ def fetch_hydrus_metadata_by_url(payload: Dict[str, Any]) -> Dict[str, Any]: assert HydrusRequestSpec is not None spec = HydrusRequestSpec( method='GET', - endpoint='/add_url/get_url_files', + endpoint='/add_urls/get_url_files', query={'url': candidate}, ) try: diff --git a/models.py b/models.py index 81a7b8c..478323a 100644 --- a/models.py +++ b/models.py @@ -80,6 +80,7 @@ class PipeObject: """ try: from SYS.logger import is_debug_enabled, debug + import shutil if not is_debug_enabled(): return @@ -87,24 +88,20 @@ class PipeObject: return # Prepare display values - hash_display = self.hash or "N/A" - store_display = self.store or "N/A" - title_display = self.title or "N/A" + hash_display = str(self.hash or "N/A") + store_display = str(self.store or "N/A") + title_display = str(self.title or "N/A") tag_display = ", ".join(self.tag[:3]) if self.tag else "[]" if len(self.tag) > 3: tag_display += f" (+{len(self.tag) - 3} more)" - file_path_display = self.path or "N/A" - if file_path_display != "N/A" and len(file_path_display) > 50: - file_path_display = "..." + file_path_display[-47:] - + file_path_display = str(self.path or "N/A") + url_display: Any = self.url or "N/A" if isinstance(url_display, (list, tuple, set)): parts = [str(x) for x in url_display if x] url_display = ", ".join(parts) if parts else "N/A" - elif url_display != "N/A": + else: url_display = str(url_display) - if url_display != "N/A" and len(url_display) > 48: - url_display = url_display[:45] + "..." relationships_display = "N/A" if self.relationships: @@ -117,24 +114,55 @@ class PipeObject: relationships_display = ", ".join(rel_parts) warnings_display = f"{len(self.warnings)} warning(s)" if self.warnings else "none" - + + def _fit(text: str, max_len: int) -> str: + if max_len <= 0: + return "" + if len(text) <= max_len: + return text + if max_len <= 3: + return text[:max_len] + return text[: max_len - 3] + "..." + + # Compute box width from terminal size, but never allow overflow. + try: + term_cols = int(getattr(shutil.get_terminal_size((120, 20)), "columns", 120)) + except Exception: + term_cols = 120 + box_inner_max = max(60, term_cols - 3) # line length = box_inner + 3 + + rows = [ + ("Hash", hash_display), + ("Store", store_display), + ("Title", title_display), + ("Tag", tag_display), + ("URL", str(url_display)), + ("File Path", file_path_display), + ("Relationships", relationships_display), + ("Warnings", warnings_display), + ] + label_width = max(len(k) for k, _ in rows) + + # Estimate a good inner width from current content, capped to terminal. + base_contents = [f"{k:<{label_width}} : {v}" for k, v in rows] + desired_inner = max([len("PipeObject Debug Info"), *[len(x) for x in base_contents], 60]) + box_inner = min(desired_inner, box_inner_max) + + def _line(content: str) -> str: + return f"│ {_fit(content, box_inner):<{box_inner}}│" + # Print table - debug("┌─────────────────────────────────────────────────────────────┐") - debug("│ PipeObject Debug Info │") - debug("├─────────────────────────────────────────────────────────────┤") - debug(f"│ Hash : {hash_display:<48}│") - debug(f"│ Store : {store_display:<48}│") - debug(f"│ Title : {title_display:<48}│") - debug(f"│ Tag : {tag_display:<48}│") - debug(f"│ URL : {url_display:<48}│") - debug(f"│ File Path : {file_path_display:<48}│") - debug(f"│ Relationships: {relationships_display:<47}│") - debug(f"│ Warnings : {warnings_display:<48}│") + debug("┌" + ("─" * (box_inner + 1)) + "┐") + debug(_line("PipeObject Debug Info")) + debug("├" + ("─" * (box_inner + 1)) + "┤") + for key, val in rows: + content = f"{key:<{label_width}} : {val}" + debug(_line(content)) # Show extra keys as individual rows if self.extra: - debug("├─────────────────────────────────────────────────────────────┤") - debug("│ Extra Fields: │") + debug("├" + ("─" * (box_inner + 1)) + "┤") + debug(_line("Extra Fields:")) for key, val in self.extra.items(): # Format value for display if isinstance(val, (list, set)): @@ -148,14 +176,16 @@ class PipeObject: val_display = val_str if len(val_str) <= 40 else val_str[:37] + "..." # Truncate key if needed - key_display = key if len(key) <= 15 else key[:12] + "..." - debug(f"│ {key_display:<15}: {val_display:<42}│") + key_display = str(key) + key_display = key_display if len(key_display) <= 15 else key_display[:12] + "..." + content = f" {key_display:<15}: {val_display}" + debug(_line(content)) # If we have structured provider metadata, expand it for debugging. full_md = self.extra.get("full_metadata") if isinstance(full_md, dict) and full_md: - debug("├─────────────────────────────────────────────────────────────┤") - debug("│ full_metadata: │") + debug("├" + ("─" * (box_inner + 1)) + "┤") + debug(_line("full_metadata:")) for md_key in sorted(full_md.keys(), key=lambda x: str(x)): md_val = full_md.get(md_key) if isinstance(md_val, (str, int, float)) or md_val is None or isinstance(md_val, bool): @@ -176,9 +206,8 @@ class PipeObject: md_key_display = str(md_key) md_key_display = md_key_display if len(md_key_display) <= 15 else md_key_display[:12] + "..." - if len(md_display) > 42: - md_display = md_display[:39] + "..." - debug(f"│ {md_key_display:<15}: {md_display:<42}│") + content = f" {md_key_display:<15}: {md_display}" + debug(_line(content)) if self.action: debug("├─────────────────────────────────────────────────────────────┤") @@ -443,11 +472,6 @@ def _sanitise_for_json(value: Any, *, max_depth: int = 8, _seen: Optional[set[in return repr(value) - -# ============================================================================ -# PROGRESS BAR CLASS -# ============================================================================ - class ProgressBar: """Formats download progress with visual bar, speed, ETA, and file size.""" diff --git a/pyproject.toml b/pyproject.toml index 2a8e02c..d4e84d7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ dependencies = [ "ffmpeg-python>=0.2.0", # Document and data handling - "PyPDF2>=3.0.0", + "pypdf>=3.0.0", "img2pdf>=0.6.0", "mutagen>=1.46.0", "cbor2>=4.0", diff --git a/requirements.txt b/requirements.txt index 02e8244..cd84ea6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,7 @@ httpx>=0.25.0 ffmpeg-python>=0.2.0 # Document and data handling -PyPDF2>=3.0.0 +pypdf>=3.0.0 img2pdf>=0.6.0 mutagen>=1.46.0 cbor2>=4.0 diff --git a/result_table.py b/result_table.py index 9fd107a..e68f9a7 100644 --- a/result_table.py +++ b/result_table.py @@ -13,6 +13,7 @@ from dataclasses import dataclass, field from typing import Any, Dict, List, Optional, Union, Callable, Tuple from pathlib import Path import json +import shutil # Optional Textual imports - graceful fallback if not available try: @@ -121,6 +122,17 @@ class ResultRow: """Add a column to this row.""" str_value = str(value) if value is not None else "" + # Tables are single-line per row: normalize hard line breaks inside cells + # so values (e.g., long descriptions) don't break the ASCII box shape. + if str_value: + str_value = ( + str_value + .replace("\r\n", " ") + .replace("\n", " ") + .replace("\r", " ") + .replace("\t", " ") + ) + # Normalize extension columns globally and cap to 5 characters if str(name).strip().lower() == "ext": str_value = str_value.strip().lstrip(".") @@ -717,6 +729,12 @@ class ResultTable: """ if not self.rows: return "No results" + + # Cap rendering to terminal width so long tables don't hard-wrap and + # visually break the border/shape. + term_width = shutil.get_terminal_size(fallback=(120, 24)).columns + if not term_width or term_width <= 0: + term_width = 120 # Calculate column widths col_widths: Dict[str, int] = {} @@ -739,7 +757,16 @@ class ResultTable: column_names = list(col_widths.keys()) def capped_width(name: str) -> int: - cap = 5 if name.lower() == "ext" else 90 + if name.lower() == "ext": + cap = 5 + else: + # Single-column tables (e.g., get-tag) can use more horizontal space, + # but still must stay within the terminal to avoid hard wrapping. + if len(column_names) == 1: + # Keep room for side walls and optional row-number column. + cap = max(30, min(240, term_width - 6)) + else: + cap = 90 return min(col_widths[name], cap) widths = ([] if self.no_choice else [num_width]) + [capped_width(name) for name in column_names] @@ -752,6 +779,10 @@ class ResultTable: if self.header_lines: table_width = max(table_width, max(len(line) for line in self.header_lines) + 2) + # Ensure final render doesn't exceed terminal width (minus 1 safety column). + safe_term_width = max(20, term_width - 1) + table_width = min(table_width, safe_term_width) + def wrap(text: str) -> str: """Wrap content with side walls and pad to table width.""" if len(text) > table_width - 2: @@ -763,12 +794,26 @@ class ResultTable: # Title block if self.title: lines.append("|" + "=" * (table_width - 2) + "|") - lines.append(wrap(self.title.ljust(table_width - 2))) + safe_title = ( + str(self.title) + .replace("\r\n", " ") + .replace("\n", " ") + .replace("\r", " ") + .replace("\t", " ") + ) + lines.append(wrap(safe_title.ljust(table_width - 2))) lines.append("|" + "=" * (table_width - 2) + "|") # Optional header metadata lines for meta in self.header_lines: - lines.append(wrap(meta)) + safe_meta = ( + str(meta) + .replace("\r\n", " ") + .replace("\n", " ") + .replace("\r", " ") + .replace("\t", " ") + ) + lines.append(wrap(safe_meta)) # Add header with # column header_parts = [] if self.no_choice else ["#".ljust(num_width)] @@ -787,6 +832,14 @@ class ResultTable: for col_name in column_names: width = capped_width(col_name) col_value = row.get_column(col_name) or "" + if col_value: + col_value = ( + col_value + .replace("\r\n", " ") + .replace("\n", " ") + .replace("\r", " ") + .replace("\t", " ") + ) if len(col_value) > width: col_value = col_value[: width - 3] + "..." row_parts.append(col_value.ljust(width))