diff --git a/CLI.py b/CLI.py index 6a10274..5ed01c6 100644 --- a/CLI.py +++ b/CLI.py @@ -1710,6 +1710,51 @@ class PipelineExecutor: return False + @staticmethod + def _maybe_open_url_selection(current_table: Any, selected_items: list, *, stage_is_last: bool) -> bool: + if not stage_is_last: + return False + if not selected_items or len(selected_items) != 1: + return False + + table_type = "" + source_cmd = "" + try: + table_type = str(getattr(current_table, "table", "") or "").strip().lower() + except Exception: + table_type = "" + try: + source_cmd = str(getattr(current_table, "source_command", "") or "").strip().replace("_", "-").lower() + except Exception: + source_cmd = "" + + if table_type != "url" and source_cmd != "get-url": + return False + + item = selected_items[0] + url = None + try: + from cmdlet._shared import get_field + + url = get_field(item, "url") + except Exception: + try: + url = item.get("url") if isinstance(item, dict) else getattr(item, "url", None) + except Exception: + url = None + + url_text = str(url or "").strip() + if not url_text: + return False + + try: + import webbrowser + + webbrowser.open(url_text, new=2) + return True + except Exception: + return False + def _maybe_enable_background_notifier(self, worker_manager: Any, config: Any, pipeline_session: Any) -> None: if not (pipeline_session and worker_manager and isinstance(config, dict)): return @@ -1798,12 +1843,15 @@ class PipelineExecutor: else: selected_row_args: List[str] = [] skip_pipe_expansion = source_cmd == ".pipe" and len(stages) > 0 - if source_cmd and not skip_pipe_expansion: - for idx in selection_indices: - row_args = ctx.get_current_stage_table_row_selection_args(idx) - if row_args: - selected_row_args.extend(row_args) - break + # Only perform @N command expansion for *single-item* selections. + # For multi-item selections (e.g. @*, @1-5), expanding to a single + # row would silently drop items. In those cases we pipe the selected + # items downstream instead. + if source_cmd and not skip_pipe_expansion and len(selection_indices) == 1: + idx = selection_indices[0] + row_args = ctx.get_current_stage_table_row_selection_args(idx) + if row_args: + selected_row_args.extend(row_args) if selected_row_args: if isinstance(source_cmd, list): @@ -1834,30 +1882,42 @@ class PipelineExecutor: command_expanded = True if (not command_expanded) and selection_indices: - last_piped_items = None - try: - last_piped_items = ctx.get_last_result_items() - except Exception: - last_piped_items = None - stage_table = None try: stage_table = ctx.get_current_stage_table() except Exception: stage_table = None - if not stage_table and hasattr(ctx, "get_display_table"): - try: - stage_table = ctx.get_display_table() - except Exception: - stage_table = None + + display_table = None + try: + display_table = ctx.get_display_table() if hasattr(ctx, "get_display_table") else None + except Exception: + display_table = None + + if not stage_table and display_table is not None: + stage_table = display_table if not stage_table: try: stage_table = ctx.get_last_result_table() except Exception: stage_table = None - resolved_items = last_piped_items if last_piped_items else [] - if last_piped_items: + # Prefer selecting from the last selectable *table* (search/playlist) + # rather than from display-only emitted items, unless we're explicitly + # selecting from an overlay table. + try: + if display_table is not None and stage_table is display_table: + items_list = ctx.get_last_result_items() or [] + else: + if hasattr(ctx, "get_last_selectable_result_items"): + items_list = ctx.get_last_selectable_result_items() or [] + else: + items_list = ctx.get_last_result_items() or [] + except Exception: + items_list = [] + + resolved_items = items_list if items_list else [] + if items_list: filtered = [resolved_items[i] for i in selection_indices if 0 <= i < len(resolved_items)] if not filtered: print("No items matched selection in pipeline\n") @@ -2003,6 +2063,14 @@ class PipelineExecutor: try: self._try_clear_pipeline_stop(ctx) + + # Preflight (URL-duplicate prompts, etc.) should be cached within a single + # pipeline run, not across independent pipelines. + try: + ctx.store_value("preflight", {}) + except Exception: + pass + stages = self._split_stages(tokens) if not stages: print("Invalid pipeline syntax\n") @@ -2066,11 +2134,39 @@ class PipelineExecutor: stage_args = stage_tokens[1:] if cmd_name == "@": + # Prefer piping the last emitted/visible items (e.g. add-file results) + # over the result-table subject. The subject can refer to older context + # (e.g. a playlist row) and may not contain store+hash. + last_items = None + try: + last_items = ctx.get_last_result_items() + except Exception: + last_items = None + + if last_items: + from cmdlet._shared import coerce_to_pipe_object + + try: + pipe_items = [coerce_to_pipe_object(x) for x in list(last_items)] + except Exception: + pipe_items = list(last_items) + piped_result = pipe_items if len(pipe_items) > 1 else pipe_items[0] + try: + ctx.set_last_items(pipe_items) + except Exception: + pass + if pipeline_session and worker_manager: + try: + worker_manager.log_step(pipeline_session.worker_id, "@ used last result items") + except Exception: + pass + continue + subject = ctx.get_last_result_subject() if subject is None: print("No current result context available for '@'\n") pipeline_status = "failed" - pipeline_error = "No result subject for @" + pipeline_error = "No result items/subject for @" return piped_result = subject try: @@ -2095,18 +2191,34 @@ class PipelineExecutor: return selected_indices = [] + # Prefer selecting from the last selectable *table* (search/playlist) + # rather than from display-only emitted items, unless we're explicitly + # selecting from an overlay table. + display_table = None + try: + display_table = ctx.get_display_table() if hasattr(ctx, "get_display_table") else None + except Exception: + display_table = None + + stage_table = ctx.get_current_stage_table() + if not stage_table and display_table is not None: + stage_table = display_table + if not stage_table: + stage_table = ctx.get_last_result_table() + + if display_table is not None and stage_table is display_table: + items_list = ctx.get_last_result_items() or [] + else: + if hasattr(ctx, "get_last_selectable_result_items"): + items_list = ctx.get_last_selectable_result_items() or [] + else: + items_list = ctx.get_last_result_items() or [] + if is_select_all: - last_items = ctx.get_last_result_items() or [] - selected_indices = list(range(len(last_items))) + selected_indices = list(range(len(items_list))) else: selected_indices = sorted([i - 1 for i in selection]) # type: ignore[arg-type] - stage_table = ctx.get_current_stage_table() - if not stage_table and hasattr(ctx, "get_display_table"): - stage_table = ctx.get_display_table() - if not stage_table: - stage_table = ctx.get_last_result_table() - items_list = ctx.get_last_result_items() or [] resolved_items = items_list if items_list else [] filtered = [resolved_items[i] for i in selected_indices if 0 <= i < len(resolved_items)] if not filtered: @@ -2115,6 +2227,20 @@ class PipelineExecutor: pipeline_error = "Empty selection" return + # UX: selecting a single URL row from get-url tables should open it. + # Only do this when the selection stage is terminal to avoid surprising + # side-effects in pipelines like `@1 | download-file`. + current_table = ctx.get_current_stage_table() or ctx.get_last_result_table() + if (not is_select_all) and (len(filtered) == 1): + try: + PipelineExecutor._maybe_open_url_selection( + current_table, + filtered, + stage_is_last=(stage_index + 1 >= len(stages)), + ) + except Exception: + pass + if PipelineExecutor._maybe_run_class_selector(ctx, config, filtered, stage_is_last=(stage_index + 1 >= len(stages))): return @@ -2366,12 +2492,19 @@ class PipelineExecutor: # the table and pause the pipeline so the user can pick @N. stage_table = ctx.get_current_stage_table() if hasattr(ctx, "get_current_stage_table") else None stage_table_type = str(getattr(stage_table, "table", "") or "").strip().lower() if stage_table else "" + try: + stage_table_source = str(getattr(stage_table, "source_command", "") or "").strip().replace("_", "-").lower() if stage_table else "" + except Exception: + stage_table_source = "" if ( (not stage_is_last) and (not emits) and cmd_name in {"download-media", "download_media"} and stage_table is not None - and stage_table_type in {"ytdlp.formatlist", "download-media", "download_media"} + and ( + stage_table_type in {"ytdlp.formatlist", "download-media", "download_media", "bandcamp", "youtube"} + or stage_table_source in {"download-media", "download_media"} + ) ): try: is_selectable = not bool(getattr(stage_table, "no_choice", False)) @@ -2407,6 +2540,10 @@ class PipelineExecutor: stdout_console().print() stdout_console().print(stage_table) + # Always pause the pipeline when a selectable table was produced. + # The user will continue by running @N/@* which will re-attach the + # pending downstream stages. + try: remaining = stages[stage_index + 1 :] source_cmd = ( diff --git a/Provider/libgen.py b/Provider/libgen.py index 214f51a..c1d3983 100644 --- a/Provider/libgen.py +++ b/Provider/libgen.py @@ -1,5 +1,6 @@ from __future__ import annotations +import html as html_std import logging import re import requests @@ -22,6 +23,595 @@ except ImportError: lxml_html = None +def _strip_html_to_text(raw: str) -> str: + s = html_std.unescape(str(raw or "")) + s = re.sub(r"(?i)", "\n", s) + # Help keep lists readable when they are link-heavy. + s = re.sub(r"(?i)", ", ", s) + s = re.sub(r"<[^>]+>", " ", s) + s = re.sub(r"\s+", " ", s) + return s.strip() + + +def _strip_html_to_lines(raw: str) -> List[str]: + """Convert a small HTML snippet to a list of meaningful text lines. + + Unlike `_strip_html_to_text`, this preserves `
` as line breaks so we can + parse LibGen ads.php tag blocks that use `
` separators. + """ + + s = html_std.unescape(str(raw or "")) + s = re.sub(r"(?is)", " ", s) + s = re.sub(r"(?is)", " ", s) + s = re.sub(r"(?i)", "\n", s) + s = re.sub(r"(?i)", "\n", s) + s = re.sub(r"(?i)", "\n", s) + # Help keep link-heavy lists readable. + s = re.sub(r"(?i)", ", ", s) + s = re.sub(r"<[^>]+>", " ", s) + s = s.replace("\r\n", "\n").replace("\r", "\n") + + out: List[str] = [] + for line in s.split("\n"): + t = re.sub(r"\s+", " ", str(line or "")).strip() + if t: + out.append(t) + return out + + +def _libgen_md5_from_url(url: str) -> str: + try: + p = urlparse(str(url or "")) + q = p.query or "" + except Exception: + q = "" + m = re.search(r"(?:^|[&?])md5=([a-fA-F0-9]{32})(?:&|$)", q) + return str(m.group(1)).lower() if m else "" + + +def _libgen_ads_url_for_target(url: str) -> str: + """Best-effort conversion of any LibGen URL to an ads.php URL (same host). + + If md5 is not present, returns empty string. + """ + + md5 = _libgen_md5_from_url(url) + if not md5: + return "" + try: + p = urlparse(str(url or "")) + scheme = p.scheme or "https" + netloc = p.netloc + if not netloc: + return "" + return f"{scheme}://{netloc}/ads.php?md5={md5}" + except Exception: + return "" + + +def _parse_libgen_ads_tags_html(html: str) -> Dict[str, Any]: + """Parse tags embedded on LibGen ads.php pages. + + Some mirrors render all metadata as a single `` with `
` separators: + title: ...
author(s): ...
isbn: ... + + Returns a metadata dict similar to `_parse_libgen_details_html` (subset), plus + `_raw_fields` with captured keys. + """ + + s = str(html or "") + td_blocks = re.findall(r"(?is)]*>(.*?)", s) + + best_lines: List[str] = [] + best_score = 0 + for td in td_blocks: + lines = _strip_html_to_lines(td) + if not lines: + continue + score = 0 + for ln in lines: + lo = ln.lower() + if ":" in ln and any(k in lo for k in ("title", "author", "publisher", "year", "isbn", "language", "series", "tags")): + score += 1 + if score > best_score: + best_score = score + best_lines = lines + + # Fallback: treat the entire page as a line list. + if not best_lines: + best_lines = _strip_html_to_lines(s) + + raw_fields: Dict[str, str] = {} + pending_key: Optional[str] = None + + def _norm_key(k: str) -> str: + kk = str(k or "").strip().lower() + kk = re.sub(r"\s+", " ", kk) + if kk in {"authors", "author(s)", "author(s).", "author(s):"}: + return "author" + if kk in {"tag", "tags"}: + return "tags" + return kk + + for ln in best_lines: + line = str(ln or "").strip() + if not line: + continue + + if ":" in line: + k, v = line.split(":", 1) + k = _norm_key(k) + v = str(v or "").strip() + if v: + raw_fields[k] = v + pending_key = None + else: + pending_key = k + continue + + # Continuation line: if the previous key had no inline value, use this. + if pending_key: + raw_fields[pending_key] = line + pending_key = None + + out: Dict[str, Any] = {"_raw_fields": dict(raw_fields)} + + title = str(raw_fields.get("title") or "").strip() + if title: + out["title"] = title + + publisher = str(raw_fields.get("publisher") or "").strip() + if publisher: + out["publisher"] = publisher + + year = str(raw_fields.get("year") or "").strip() + if year: + out["year"] = year + + language = str(raw_fields.get("language") or "").strip() + if language: + out["language"] = language + + authors_raw = str(raw_fields.get("author") or "").strip() + if authors_raw: + out["authors"] = _split_listish_text(authors_raw) + + # ISBN: extract all tokens (some pages include multiple). + isbn_raw = str(raw_fields.get("isbn") or "").strip() + if isbn_raw: + isbns = _extract_isbns(isbn_raw) + if isbns: + out["isbn"] = isbns + + tags_raw = str(raw_fields.get("tags") or "").strip() + if tags_raw: + # Keep these as freeform tags (split on commas/semicolons/pipes). + out["tags"] = _split_listish_text(tags_raw) + + return out + + +def _extract_anchor_texts(raw_html: str) -> List[str]: + out: List[str] = [] + for m in re.finditer(r"(?is)]*>(.*?)", str(raw_html or "")): + t = _strip_html_to_text(m.group(1)) + if t: + out.append(t) + # De-dupe, preserve order + seen: set[str] = set() + uniq: List[str] = [] + for x in out: + k = x.strip() + if not k: + continue + if k.lower() in seen: + continue + seen.add(k.lower()) + uniq.append(k) + return uniq + + +def _split_listish_text(value: str) -> List[str]: + s = str(value or "").strip() + if not s: + return [] + parts = re.split(r"\s*(?:,|;|\|)\s*", s) + out: List[str] = [] + for p in parts: + p = str(p or "").strip() + if p: + out.append(p) + return out + + +def _extract_isbns(text: str) -> List[str]: + s = str(text or "") + candidates = re.findall(r"\b[0-9Xx][0-9Xx\-\s]{8,20}[0-9Xx]\b", s) + out: List[str] = [] + for c in candidates: + n = re.sub(r"[^0-9Xx]", "", c).upper() + if len(n) not in (10, 13): + continue + if n not in out: + out.append(n) + # Also handle already-clean tokens. + for c in re.findall(r"\b(?:97[89])?\d{9}[\dXx]\b", s): + n = str(c).upper() + if n not in out: + out.append(n) + return out + + +def _libgen_id_from_url(url: str) -> str: + # Handles edition.php?id=..., file.php?id=... + m = re.search(r"(?:\?|&)id=(\d+)", str(url or ""), flags=re.IGNORECASE) + return str(m.group(1)) if m else "" + + +def _prefer_isbn(isbns: List[str]) -> str: + vals = [str(x or "").strip() for x in (isbns or []) if str(x or "").strip()] + # Prefer ISBN-13, then ISBN-10. + for v in vals: + if len(v) == 13: + return v + for v in vals: + if len(v) == 10: + return v + return vals[0] if vals else "" + + +def _enrich_book_tags_from_isbn(isbn: str, *, config: Optional[Dict[str, Any]] = None) -> Tuple[List[str], str]: + """Return (tags, source_name) for the given ISBN. + + Priority: + 1) OpenLibrary API-by-ISBN scrape (fast, structured) + 2) isbnsearch.org scrape via MetadataProvider + """ + + isbn_clean = re.sub(r"[^0-9Xx]", "", str(isbn or "")).upper() + if len(isbn_clean) not in (10, 13): + return [], "" + + # 1) OpenLibrary API lookup by ISBN (short timeout, silent failure). + try: + url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{isbn_clean}&jscmd=data&format=json" + resp = requests.get(url, timeout=4) + resp.raise_for_status() + data = resp.json() + if isinstance(data, dict) and data: + book_data = next(iter(data.values()), None) + else: + book_data = None + + if isinstance(book_data, dict): + tags: List[str] = [] + + def _add(t: str) -> None: + s = str(t or "").strip() + if s: + tags.append(s) + + if book_data.get("title"): + _add(f"title:{book_data['title']}") + + authors = book_data.get("authors") + if isinstance(authors, list): + for a in authors[:3]: + if isinstance(a, dict) and a.get("name"): + _add(f"author:{a['name']}") + + if book_data.get("publish_date"): + _add(f"publish_date:{book_data['publish_date']}") + + publishers = book_data.get("publishers") + if isinstance(publishers, list) and publishers: + pub0 = publishers[0] + if isinstance(pub0, dict) and pub0.get("name"): + _add(f"publisher:{pub0['name']}") + + desc = book_data.get("description") + if isinstance(desc, dict) and "value" in desc: + desc = desc.get("value") + if desc: + desc_str = str(desc).strip() + if desc_str: + _add(f"description:{desc_str[:200]}") + + pages = book_data.get("number_of_pages") + if isinstance(pages, int) and pages > 0: + _add(f"pages:{pages}") + + identifiers = book_data.get("identifiers") + if isinstance(identifiers, dict): + + def _first(value: Any) -> Any: + if isinstance(value, list) and value: + return value[0] + return value + + for key, ns in ( + ("openlibrary", "openlibrary"), + ("lccn", "lccn"), + ("oclc", "oclc"), + ("goodreads", "goodreads"), + ("librarything", "librarything"), + ("doi", "doi"), + ("internet_archive", "internet_archive"), + ): + val = _first(identifiers.get(key)) + if val: + _add(f"{ns}:{val}") + + if not any(str(t).lower().startswith("isbn:") for t in tags): + tags.insert(0, f"isbn:{isbn_clean}") + + # De-dupe case-insensitively, preserve order. + seen: set[str] = set() + out: List[str] = [] + for t in tags: + k = str(t).strip().lower() + if not k or k in seen: + continue + seen.add(k) + out.append(str(t).strip()) + + if out: + return out, "openlibrary" + except Exception: + pass + + # 2) isbnsearch metadata provider fallback. + try: + from Provider.metadata_provider import get_metadata_provider + + provider = get_metadata_provider("isbnsearch", config or {}) + if provider is None: + return [], "" + items = provider.search(isbn_clean, limit=1) + if not items: + return [], "" + tags = provider.to_tags(items[0]) + if not any(str(t).lower().startswith("isbn:") for t in tags): + tags = [f"isbn:{isbn_clean}"] + [str(t) for t in tags] + return [str(t) for t in tags if str(t).strip()], provider.name + except Exception: + return [], "" + + +def _fetch_libgen_details_html(url: str, *, timeout: Optional[Tuple[float, float]] = None) -> Optional[str]: + try: + if timeout is None: + timeout = (DEFAULT_CONNECT_TIMEOUT, DEFAULT_READ_TIMEOUT) + session = requests.Session() + session.headers.update( + { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36", + } + ) + with session.get(str(url), stream=True, timeout=timeout) as resp: + resp.raise_for_status() + ct = str(resp.headers.get("Content-Type", "")).lower() + if "text/html" not in ct: + return None + return resp.text + except Exception: + return None + + +def _parse_libgen_details_html(html: str) -> Dict[str, Any]: + """Parse LibGen details-page HTML (edition.php/file.php) into a metadata dict. + + Best-effort and intentionally tolerant of mirror variations. + """ + + out: Dict[str, Any] = {} + raw_fields: Dict[str, str] = {} + s = str(html or "") + + # Fast path: try to pull simple Label/Value table rows. + for m in re.finditer( + r"(?is)]*>\s*]*>\s*([^<]{1,80}?)\s*:??\s*\s*]*>(.*?)\s*", + s, + ): + label = _strip_html_to_text(m.group(1)) + raw_val_html = str(m.group(2) or "") + if not label: + continue + val_text = _strip_html_to_text(raw_val_html) + if not val_text: + continue + raw_fields[label] = val_text + + norm = re.sub(r"[^a-z0-9]+", "_", label.strip().lower()).strip("_") + if not norm: + continue + + # Prefer anchors for multi-valued fields. + anchors = _extract_anchor_texts(raw_val_html) + if anchors: + out[norm] = anchors + else: + out[norm] = val_text + + # Some libgen.gl edition pages group metadata as repeated blocks like: + # Title: + # The Title + # We'll parse those too (best-effort, no DOM required). + strong_matches = list(re.finditer(r"(?is)]*>(.*?)", s)) + if strong_matches: + for idx, m in enumerate(strong_matches): + label_raw = _strip_html_to_text(m.group(1)) + label = str(label_raw or "").strip() + if not label: + continue + + # Normalize label (strip trailing colon if present). + if label.endswith(":"): + label = label[:-1].strip() + + chunk_start = m.end() + chunk_end = strong_matches[idx + 1].start() if (idx + 1) < len(strong_matches) else len(s) + raw_val_html = s[chunk_start:chunk_end] + + # If we already have a value for this label from a table row, keep it. + if label in raw_fields: + continue + + val_text = _strip_html_to_text(raw_val_html) + if not val_text: + continue + + raw_fields[label] = val_text + + norm = re.sub(r"[^a-z0-9]+", "_", label.strip().lower()).strip("_") + if not norm: + continue + + anchors = _extract_anchor_texts(raw_val_html) + if anchors: + out[norm] = anchors + else: + out[norm] = val_text + + # Normalize keys of interest. + def _first_str(v: Any) -> str: + if isinstance(v, list) and v: + return str(v[0] or "").strip() + return str(v or "").strip() + + title = _first_str(out.get("title")) + if title: + out["title"] = title + + authors = out.get("author_s") or out.get("authors") or out.get("author") + if isinstance(authors, str): + authors_list = _split_listish_text(authors) + elif isinstance(authors, list): + authors_list = [str(x).strip() for x in authors if str(x).strip()] + else: + authors_list = [] + if authors_list: + out["authors"] = authors_list + + publisher = _first_str(out.get("publisher")) + if publisher: + out["publisher"] = publisher + + year = _first_str(out.get("year")) + if year: + out["year"] = year + + language = _first_str(out.get("language")) + if language: + out["language"] = language + + oclc = _first_str(out.get("oclc_worldcat")) or _first_str(out.get("oclc")) + if oclc: + m_oclc = re.search(r"\b\d{5,15}\b", oclc) + out["oclc"] = str(m_oclc.group(0)) if m_oclc else oclc + + tags_val = out.get("tags") + if isinstance(tags_val, list): + tags_list = [str(x).strip() for x in tags_val if str(x).strip()] + elif isinstance(tags_val, str): + tags_list = _split_listish_text(tags_val) + else: + tags_list = [] + if tags_list: + out["tags"] = tags_list + + isbn_val = out.get("isbn") + isbn_text = "" + if isinstance(isbn_val, list): + isbn_text = " ".join([str(x) for x in isbn_val if x]) + else: + isbn_text = str(isbn_val or "") + isbns = _extract_isbns(isbn_text) + if isbns: + out["isbn"] = isbns + + edition_id = _first_str(out.get("edition_id")) + if edition_id: + m_eid = re.search(r"\b\d+\b", edition_id) + out["edition_id"] = str(m_eid.group(0)) if m_eid else edition_id + + if raw_fields: + out["_raw_fields"] = raw_fields + + return out + + +def _libgen_metadata_to_tags(meta: Dict[str, Any]) -> List[str]: + tags: List[str] = [] + seen: set[str] = set() + + def _add(t: str) -> None: + s = str(t or "").strip() + if not s: + return + k = s.lower() + if k in seen: + return + seen.add(k) + tags.append(s) + + title = str(meta.get("title") or "").strip() + if title: + _add(f"title:{title}") + + for a in meta.get("authors") or []: + a = str(a or "").strip() + if a: + _add(f"author:{a}") + + publisher = str(meta.get("publisher") or "").strip() + if publisher: + _add(f"publisher:{publisher}") + + year = str(meta.get("year") or "").strip() + if year: + _add(f"year:{year}") + + language = str(meta.get("language") or "").strip() + if language: + _add(f"language:{language}") + + for isbn in meta.get("isbn") or []: + isbn = str(isbn or "").strip().replace("-", "") + if isbn: + _add(f"isbn:{isbn}") + + oclc = str(meta.get("oclc") or "").strip() + if oclc: + _add(f"oclc:{oclc}") + + edition_id = str(meta.get("edition_id") or "").strip() + if edition_id: + _add(f"libgen_edition_id:{edition_id}") + + # Freeform tags (no "tags:" prefix). + for t in meta.get("tags") or []: + t = str(t or "").strip() + if t: + _add(t) + + # Any additional structured fields we captured are preserved under a libgen_ namespace. + raw_fields = meta.get("_raw_fields") + if isinstance(raw_fields, dict): + for k, v in raw_fields.items(): + lk = str(k or "").strip().lower() + if lk in {"title", "author(s)", "authors", "author", "publisher", "year", "isbn", "language", "oclc/worldcat", "tags", "edition id"}: + continue + vv = str(v or "").strip() + if not vv: + continue + ns = re.sub(r"[^a-z0-9]+", "_", lk).strip("_") + if ns: + _add(f"libgen_{ns}:{vv}") + + return tags + + class Libgen(Provider): # Domains that should be routed to this provider when the user supplies a URL. # (Used by ProviderCore.registry.match_provider_name_for_url) @@ -146,6 +736,15 @@ class Libgen(Provider): md5 = str(md.get("md5") or "").strip() extension = str(md.get("extension") or "").strip().lstrip(".") + # If the user passed ads.php/get.php directly, capture md5 from the URL so + # filenames are stable (avoid always writing `libgen.pdf`). + if (not md5) and isinstance(target, str) and target.startswith("http"): + md5 = _libgen_md5_from_url(target) + if md5: + md["md5"] = md5 + + # Defer LibGen details-page metadata and ISBN enrichment until AFTER the file is downloaded. + if (not target) or target.startswith("libgen:"): if md5 and re.fullmatch(r"[a-fA-F0-9]{32}", md5): target = urljoin(MIRRORS[0], f"/ads.php?md5={md5}") @@ -153,7 +752,10 @@ class Libgen(Provider): if not target: return None - base_name = sanitize_filename(title or md5 or "libgen") + if title and title.startswith("http"): + title = "" + + base_name = sanitize_filename(title or md5 or (f"libgen_{_libgen_id_from_url(target)}" if _libgen_id_from_url(target) else "libgen")) out_path = output_dir / base_name if extension: out_path = out_path.with_suffix(f".{extension}") @@ -190,6 +792,122 @@ class Libgen(Provider): ok, final_path = download_from_mirror(target, out_path, progress_callback=progress_callback) progress_bar.finish() if ok and final_path: + # After the download completes, best-effort fetch details metadata (title + ISBN) + # and then enrich tags via OpenLibrary/isbnsearch. This ensures enrichment never + # blocks the download itself. + try: + if isinstance(target, str) and target.startswith("http"): + low = target.lower() + # Preferred: ads.php pages often embed a complete tag block. + # Parse it post-download (best-effort) and do NOT perform external + # enrichment (OpenLibrary/isbnsearch) unless the user later chooses to. + if ("/ads.php" in low) or ("/get.php" in low): + ads_url = target if "/ads.php" in low else _libgen_ads_url_for_target(target) + if ads_url: + html = _fetch_libgen_details_html(ads_url, timeout=(DEFAULT_CONNECT_TIMEOUT, 4.0)) + if html: + meta = _parse_libgen_ads_tags_html(html) + extracted_title = str(meta.get("title") or "").strip() + if extracted_title: + md["title"] = extracted_title + result.tag.add(f"title:{extracted_title}") + if (not title) or title.startswith("http"): + title = extracted_title + + authors = meta.get("authors") if isinstance(meta.get("authors"), list) else [] + for a in (authors or []): + aa = str(a or "").strip() + if aa: + result.tag.add(f"author:{aa}") + + publisher = str(meta.get("publisher") or "").strip() + if publisher: + md["publisher"] = publisher + result.tag.add(f"publisher:{publisher}") + + year = str(meta.get("year") or "").strip() + if year: + md["year"] = year + result.tag.add(f"year:{year}") + + language = str(meta.get("language") or "").strip() + if language: + md["language"] = language + result.tag.add(f"language:{language}") + + isbns = meta.get("isbn") if isinstance(meta.get("isbn"), list) else [] + isbns = [str(x).strip() for x in (isbns or []) if str(x).strip()] + if isbns: + md["isbn"] = isbns + for isbn_val in isbns: + result.tag.add(f"isbn:{isbn_val}") + + free_tags = meta.get("tags") if isinstance(meta.get("tags"), list) else [] + for t in (free_tags or []): + tt = str(t or "").strip() + if tt: + result.tag.add(tt) + + # Preserve any other extracted fields (namespaced). + raw_fields = meta.get("_raw_fields") + if isinstance(raw_fields, dict): + for k, v in raw_fields.items(): + lk = str(k or "").strip().lower() + if lk in {"title", "author", "authors", "publisher", "year", "isbn", "language", "tags"}: + continue + vv = str(v or "").strip() + if not vv: + continue + ns = re.sub(r"[^a-z0-9]+", "_", lk).strip("_") + if ns: + result.tag.add(f"libgen_{ns}:{vv}") + + # Legacy: edition/file/series details pages (title + ISBN) + external enrichment. + if ("/edition.php" in low) or ("/file.php" in low) or ("/series.php" in low): + html = _fetch_libgen_details_html(target) + if html: + meta = _parse_libgen_details_html(html) + + if not meta.get("edition_id"): + eid = _libgen_id_from_url(target) + if eid: + meta["edition_id"] = eid + + extracted_title = str(meta.get("title") or "").strip() + extracted_isbns = meta.get("isbn") if isinstance(meta.get("isbn"), list) else [] + extracted_isbns = [str(x).strip() for x in (extracted_isbns or []) if str(x).strip()] + + if extracted_title: + md["title"] = extracted_title + result.tag.add(f"title:{extracted_title}") + if extracted_isbns: + md["isbn"] = extracted_isbns + for isbn_val in extracted_isbns: + isbn_norm = str(isbn_val).strip().replace("-", "") + if isbn_norm: + result.tag.add(f"isbn:{isbn_norm}") + if meta.get("edition_id"): + md["edition_id"] = str(meta.get("edition_id")) + + preferred_isbn = _prefer_isbn(extracted_isbns) + if preferred_isbn: + enriched_tags, enriched_source = _enrich_book_tags_from_isbn( + preferred_isbn, + config=getattr(self, "config", None), + ) + if enriched_tags: + try: + result.tag.update(set(enriched_tags)) + except Exception: + pass + if enriched_source: + md["metadata_enriched_from"] = enriched_source + + if extracted_title and ((not title) or title.startswith("http")): + title = extracted_title + except Exception: + pass + return Path(final_path) return None except Exception: @@ -751,6 +1469,34 @@ def _resolve_download_url( if not html: return None + # LibGen chain helpers (for environments without lxml). + # Typical chain: + # edition.php?id=... -> file.php?id=... + # file.php?id=... -> ads.php?md5=... (or get.php?md5=...) + # ads.php?md5=... -> get.php?md5=... + # get.php?md5=... -> file response + + # Handle edition -> file links. + m = re.search(r'href=["\']([^"\']*file\.php\?id=\d+[^"\']*)["\']', html, flags=re.IGNORECASE) + if m: + href = str(m.group(1) or "").strip() + if href and not href.lower().startswith("javascript:"): + return urljoin(base_url, href) + + # Handle series -> edition links. + m = re.search(r'href=["\']([^"\']*edition\.php\?id=\d+[^"\']*)["\']', html, flags=re.IGNORECASE) + if m: + href = str(m.group(1) or "").strip() + if href and not href.lower().startswith("javascript:"): + return urljoin(base_url, href) + + # Handle file -> ads/get links (sometimes present as the "Libgen" mirror). + m = re.search(r'href=["\']([^"\']*ads\.php\?md5=[a-fA-F0-9]{32}[^"\']*)["\']', html, flags=re.IGNORECASE) + if m: + href = str(m.group(1) or "").strip() + if href and not href.lower().startswith("javascript:"): + return urljoin(base_url, href) + # Prefer explicit get.php md5 links (most common successful chain). m = re.search(r'href=["\']([^"\']*get\.php\?md5=[a-fA-F0-9]{32}[^"\']*)["\']', html, flags=re.IGNORECASE) if m: diff --git a/Provider/metadata_provider.py b/Provider/metadata_provider.py index ce995d4..31bf2e8 100644 --- a/Provider/metadata_provider.py +++ b/Provider/metadata_provider.py @@ -2,6 +2,8 @@ from __future__ import annotations from abc import ABC, abstractmethod from typing import Any, Dict, List, Optional, Type, cast +import html as html_std +import re import requests import sys import json @@ -279,6 +281,199 @@ class GoogleBooksMetadataProvider(MetadataProvider): return tags +class ISBNsearchMetadataProvider(MetadataProvider): + """Metadata provider that scrapes isbnsearch.org by ISBN. + + This is a best-effort HTML scrape. It expects the query to be an ISBN. + """ + + @property + def name(self) -> str: # type: ignore[override] + return "isbnsearch" + + @staticmethod + def _strip_html_to_text(raw: str) -> str: + s = html_std.unescape(str(raw or "")) + s = re.sub(r"(?i)", "\n", s) + s = re.sub(r"<[^>]+>", " ", s) + s = re.sub(r"\s+", " ", s) + return s.strip() + + @staticmethod + def _clean_isbn(query: str) -> str: + s = str(query or "").strip() + if not s: + return "" + s = s.replace("isbn:", "").replace("ISBN:", "") + s = re.sub(r"[^0-9Xx]", "", s).upper() + if len(s) in (10, 13): + return s + # Try to locate an ISBN-like token inside the query. + m = re.search(r"\b(?:97[89])?\d{9}[\dXx]\b", s) + return str(m.group(0)).upper() if m else "" + + def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]: + _ = limit + isbn = self._clean_isbn(query) + if not isbn: + return [] + + url = f"https://isbnsearch.org/isbn/{isbn}" + try: + resp = requests.get(url, timeout=10) + resp.raise_for_status() + html = str(resp.text or "") + if not html: + return [] + except Exception as exc: + log(f"ISBNsearch scrape failed: {exc}", file=sys.stderr) + return [] + + title = "" + m_title = re.search(r"(?is)]*>(.*?)", html) + if m_title: + title = self._strip_html_to_text(m_title.group(1)) + + raw_fields: Dict[str, str] = {} + strong_matches = list(re.finditer(r"(?is)]*>(.*?)", html)) + for idx, m in enumerate(strong_matches): + label_raw = self._strip_html_to_text(m.group(1)) + label = str(label_raw or "").strip() + if not label: + continue + if label.endswith(":"): + label = label[:-1].strip() + + chunk_start = m.end() + # Stop at next or end of document. + chunk_end = strong_matches[idx + 1].start() if (idx + 1) < len(strong_matches) else len(html) + chunk = html[chunk_start:chunk_end] + # Prefer stopping within the same paragraph when possible. + m_end = re.search(r"(?is)(

|)", chunk) + if m_end: + chunk = chunk[: m_end.start()] + + val_text = self._strip_html_to_text(chunk) + if not val_text: + continue + raw_fields[label] = val_text + + def _get(*labels: str) -> str: + for lab in labels: + for k, v in raw_fields.items(): + if str(k).strip().lower() == str(lab).strip().lower(): + return str(v or "").strip() + return "" + + # Map common ISBNsearch labels. + author_text = _get("Author", "Authors", "Author(s)") + publisher = _get("Publisher") + published = _get("Published", "Publication Date", "Publish Date") + language = _get("Language") + pages = _get("Pages") + isbn_13 = _get("ISBN-13", "ISBN13") + isbn_10 = _get("ISBN-10", "ISBN10") + + year = "" + if published: + m_year = re.search(r"\b(\d{4})\b", published) + year = str(m_year.group(1)) if m_year else "" + + authors: List[str] = [] + if author_text: + # Split on common separators; keep multi-part names intact. + for part in re.split(r"\s*(?:,|;|\band\b|\&|\|)\s*", author_text, flags=re.IGNORECASE): + p = str(part or "").strip() + if p: + authors.append(p) + + # Prefer parsed title, but fall back to og:title if needed. + if not title: + m_og = re.search(r"(?is)]*property=['\"]og:title['\"][^>]*content=['\"](.*?)['\"][^>]*>", html) + if m_og: + title = self._strip_html_to_text(m_og.group(1)) + + # Ensure ISBN tokens are normalized. + isbn_tokens: List[str] = [] + for token in [isbn_13, isbn_10, isbn]: + t = self._clean_isbn(token) + if t and t not in isbn_tokens: + isbn_tokens.append(t) + + item: Dict[str, Any] = { + "title": title or "", + # Keep UI columns compatible with the generic metadata table. + "artist": ", ".join(authors) if authors else "", + "album": publisher or "", + "year": year or "", + "provider": self.name, + "authors": authors, + "publisher": publisher or "", + "language": language or "", + "pages": pages or "", + "identifiers": { + "isbn_13": next((t for t in isbn_tokens if len(t) == 13), None), + "isbn_10": next((t for t in isbn_tokens if len(t) == 10), None), + }, + "raw_fields": raw_fields, + } + + # Only return usable items. + if not item.get("title") and not any(item["identifiers"].values()): + return [] + + return [item] + + def to_tags(self, item: Dict[str, Any]) -> List[str]: + tags: List[str] = [] + + title = str(item.get("title") or "").strip() + if title: + tags.append(f"title:{title}") + + authors = item.get("authors") or [] + if isinstance(authors, list): + for a in authors: + a = str(a or "").strip() + if a: + tags.append(f"author:{a}") + + publisher = str(item.get("publisher") or "").strip() + if publisher: + tags.append(f"publisher:{publisher}") + + year = str(item.get("year") or "").strip() + if year: + tags.append(f"year:{year}") + + language = str(item.get("language") or "").strip() + if language: + tags.append(f"language:{language}") + + identifiers = item.get("identifiers") or {} + if isinstance(identifiers, dict): + for key in ("isbn_13", "isbn_10"): + val = identifiers.get(key) + if val: + tags.append(f"isbn:{val}") + + tags.append(f"source:{self.name}") + + # Dedup case-insensitively, preserve order. + seen: set[str] = set() + out: List[str] = [] + for t in tags: + s = str(t or "").strip() + if not s: + continue + k = s.lower() + if k in seen: + continue + seen.add(k) + out.append(s) + return out + + class MusicBrainzMetadataProvider(MetadataProvider): """Metadata provider for MusicBrainz recordings.""" @@ -517,6 +712,7 @@ _METADATA_PROVIDERS: Dict[str, Type[MetadataProvider]] = { "openlibrary": OpenLibraryMetadataProvider, "googlebooks": GoogleBooksMetadataProvider, "google": GoogleBooksMetadataProvider, + "isbnsearch": ISBNsearchMetadataProvider, "musicbrainz": MusicBrainzMetadataProvider, "ytdlp": YtdlpMetadataProvider, } diff --git a/Provider/openlibrary.py b/Provider/openlibrary.py index c2202c5..cf27f07 100644 --- a/Provider/openlibrary.py +++ b/Provider/openlibrary.py @@ -190,6 +190,7 @@ def _archive_id_from_url(url: str) -> str: u = str(url or "").strip() if not u: return "" + try: p = urlparse(u) host = (p.hostname or "").lower().strip() @@ -215,6 +216,175 @@ def _archive_id_from_url(url: str) -> str: return "" +def _coerce_archive_field_list(value: Any) -> List[str]: + """Coerce an Archive.org metadata field to a list of strings.""" + if value is None: + return [] + if isinstance(value, list): + out: List[str] = [] + for v in value: + try: + s = str(v).strip() + except Exception: + continue + if s: + out.append(s) + return out + if isinstance(value, (tuple, set)): + out = [] + for v in value: + try: + s = str(v).strip() + except Exception: + continue + if s: + out.append(s) + return out + try: + s = str(value).strip() + except Exception: + return [] + return [s] if s else [] + + +def _archive_item_metadata_to_tags(archive_id: str, item_metadata: Dict[str, Any]) -> List[str]: + """Map Archive.org metadata JSON (the `metadata` object) to tag strings. + + This is intentionally best-effort and conservative: it focuses on stable, + useful bibliographic fields (title/author/publisher/ISBN/identifier/topics). + """ + archive_id_clean = str(archive_id or "").strip() + meta = item_metadata if isinstance(item_metadata, dict) else {} + + tags: List[str] = [] + seen: set[str] = set() + + def _add(tag: str) -> None: + try: + t = str(tag).strip() + except Exception: + return + if not t: + return + if t.lower() in seen: + return + seen.add(t.lower()) + tags.append(t) + + if archive_id_clean: + _add(f"internet_archive:{archive_id_clean}") + + # Title + for title in _coerce_archive_field_list(meta.get("title"))[:1]: + _add(f"title:{title}") + + # Authors/creators + creators: List[str] = [] + creators.extend(_coerce_archive_field_list(meta.get("creator"))) + creators.extend(_coerce_archive_field_list(meta.get("author"))) + for creator in creators[:3]: + _add(f"author:{creator}") + + # Publisher + for publisher in _coerce_archive_field_list(meta.get("publisher"))[:3]: + _add(f"publisher:{publisher}") + + # Publish date/year + for date_val in _coerce_archive_field_list(meta.get("date"))[:1]: + _add(f"publish_date:{date_val}") + for year_val in _coerce_archive_field_list(meta.get("year"))[:1]: + _add(f"publish_date:{year_val}") + + # Language + for lang in _coerce_archive_field_list(meta.get("language"))[:3]: + _add(f"language:{lang}") + + # Topics/subjects: follow existing OpenLibrary behavior (un-namespaced tags) + for subj in _coerce_archive_field_list(meta.get("subject"))[:15]: + if len(subj) > 200: + subj = subj[:200] + _add(subj) + + # ISBNs and identifiers + def _clean_isbn(raw: str) -> str: + return str(raw or "").replace("-", "").strip() + + for isbn in _coerce_archive_field_list(meta.get("isbn"))[:10]: + isbn_clean = _clean_isbn(isbn) + if isbn_clean: + _add(f"isbn:{isbn_clean}") + + identifiers: List[str] = [] + identifiers.extend(_coerce_archive_field_list(meta.get("identifier"))) + identifiers.extend(_coerce_archive_field_list(meta.get("external-identifier"))) + added_other = 0 + for ident in identifiers: + ident_s = str(ident or "").strip() + if not ident_s: + continue + low = ident_s.lower() + + if low.startswith("urn:isbn:"): + val = _clean_isbn(ident_s.split(":", 2)[-1]) + if val: + _add(f"isbn:{val}") + continue + if low.startswith("isbn:"): + val = _clean_isbn(ident_s.split(":", 1)[-1]) + if val: + _add(f"isbn:{val}") + continue + if low.startswith("urn:oclc:"): + val = ident_s.split(":", 2)[-1].strip() + if val: + _add(f"oclc:{val}") + continue + if low.startswith("oclc:"): + val = ident_s.split(":", 1)[-1].strip() + if val: + _add(f"oclc:{val}") + continue + if low.startswith("urn:lccn:"): + val = ident_s.split(":", 2)[-1].strip() + if val: + _add(f"lccn:{val}") + continue + if low.startswith("lccn:"): + val = ident_s.split(":", 1)[-1].strip() + if val: + _add(f"lccn:{val}") + continue + if low.startswith("doi:"): + val = ident_s.split(":", 1)[-1].strip() + if val: + _add(f"doi:{val}") + continue + + if archive_id_clean and low == archive_id_clean.lower(): + continue + if added_other >= 5: + continue + if len(ident_s) > 200: + ident_s = ident_s[:200] + _add(f"identifier:{ident_s}") + added_other += 1 + + return tags + + +def _fetch_archive_item_metadata(archive_id: str, *, timeout: int = 8) -> Dict[str, Any]: + ident = str(archive_id or "").strip() + if not ident: + return {} + resp = requests.get(f"https://archive.org/metadata/{ident}", timeout=int(timeout)) + resp.raise_for_status() + data = resp.json() if resp is not None else {} + if not isinstance(data, dict): + return {} + meta = data.get("metadata") + return meta if isinstance(meta, dict) else {} + + class OpenLibrary(Provider): # Domains that should be routed to this provider when the user supplies a URL. # (Used by ProviderCore.registry.match_provider_name_for_url) @@ -1046,6 +1216,25 @@ class OpenLibrary(Provider): log("[openlibrary] No archive identifier available; cannot download", file=sys.stderr) return None + # Best-effort metadata scrape to attach bibliographic tags for downstream cmdlets. + try: + archive_meta = _fetch_archive_item_metadata(archive_id) + tags = _archive_item_metadata_to_tags(archive_id, archive_meta) + if tags: + try: + result.tag.update(tags) + except Exception: + # Fallback for callers that pass plain dicts. + pass + if isinstance(meta, dict): + meta["archive_id"] = archive_id + if archive_meta: + meta["archive_metadata"] = archive_meta + result.full_metadata = meta + except Exception: + # Never block downloads on metadata fetch. + pass + safe_title = sanitize_filename(result.title) if not safe_title or "http" in safe_title.lower(): safe_title = sanitize_filename(archive_id) or "archive" diff --git a/Provider/soulseek.py b/Provider/soulseek.py index 65abfb8..9e00696 100644 --- a/Provider/soulseek.py +++ b/Provider/soulseek.py @@ -306,36 +306,41 @@ class Soulseek(Provider): client = SoulSeekClient(settings) with _suppress_aioslsk_noise(): - try: - await client.start() - await client.login() - except Exception as exc: - log(f"[soulseek] Login failed: {type(exc).__name__}: {exc}", file=sys.stderr) - return [] + async with _suppress_aioslsk_asyncio_task_noise(): + try: + await client.start() + await client.login() + except Exception as exc: + log(f"[soulseek] Login failed: {type(exc).__name__}: {exc}", file=sys.stderr) + return [] - try: - search_request = await client.searches.search(query) - await self._collect_results(search_request, timeout=timeout) - return self._flatten_results(search_request)[:limit] - except Exception as exc: - log(f"[soulseek] Search error: {type(exc).__name__}: {exc}", file=sys.stderr) - return [] - finally: - # Best-effort: try to cancel/close the search request before stopping - # the client to reduce stray reply spam. try: - if "search_request" in locals() and search_request is not None: - cancel = getattr(search_request, "cancel", None) - if callable(cancel): - maybe = cancel() - if asyncio.iscoroutine(maybe): - await maybe - except Exception: - pass - try: - await client.stop() - except Exception: - pass + search_request = await client.searches.search(query) + await self._collect_results(search_request, timeout=timeout) + return self._flatten_results(search_request)[:limit] + except Exception as exc: + log(f"[soulseek] Search error: {type(exc).__name__}: {exc}", file=sys.stderr) + return [] + finally: + # Best-effort: try to cancel/close the search request before stopping + # the client to reduce stray reply spam. + try: + if "search_request" in locals() and search_request is not None: + cancel = getattr(search_request, "cancel", None) + if callable(cancel): + maybe = cancel() + if asyncio.iscoroutine(maybe): + await maybe + except Exception: + pass + try: + await client.stop() + except Exception: + pass + try: + await asyncio.sleep(0) + except Exception: + pass def _flatten_results(self, search_request: Any) -> List[dict]: flat: List[dict] = [] @@ -558,93 +563,101 @@ async def download_soulseek_file( async def _attempt_once(attempt_num: int) -> tuple[Optional[Path], Any, int, float]: client = SoulSeekClient(settings) with _suppress_aioslsk_noise(): - try: - async with _suppress_aioslsk_asyncio_task_noise(): - await client.start() - await client.login() - debug(f"[soulseek] Logged in as {login_user}") - - log( - f"[soulseek] Download attempt {attempt_num}: {username} :: {local_filename}", - file=sys.stderr, - ) - debug(f"[soulseek] Requesting download from {username}: {filename}") - - transfer = await client.transfers.add(Transfer(username, filename, TransferDirection.DOWNLOAD)) - transfer.local_path = str(output_path) - await client.transfers.queue(transfer) - - start_time = time.time() - last_progress_time = start_time - progress_bar = ProgressBar() - - while not transfer.is_finalized(): - elapsed = time.time() - start_time - if elapsed > timeout: - log(f"[soulseek] Download timeout after {timeout}s", file=sys.stderr) - bytes_done = int(getattr(transfer, "bytes_transfered", 0) or 0) - state_val = getattr(getattr(transfer, "state", None), "VALUE", None) - progress_bar.finish() - return None, state_val, bytes_done, elapsed - - bytes_done = int(getattr(transfer, "bytes_transfered", 0) or 0) - total_bytes = int(getattr(transfer, "filesize", 0) or 0) - now = time.time() - if now - last_progress_time >= 0.5: - progress_bar.update( - downloaded=bytes_done, - total=total_bytes if total_bytes > 0 else None, - label="download", - file=sys.stderr, - ) - last_progress_time = now - - await asyncio.sleep(1) - - final_state = getattr(getattr(transfer, "state", None), "VALUE", None) - downloaded_path = Path(transfer.local_path) if getattr(transfer, "local_path", None) else output_path - final_elapsed = time.time() - start_time - - # Clear in-place progress bar. - progress_bar.finish() - - # If a file was written, treat it as success even if state is odd. + async with _suppress_aioslsk_asyncio_task_noise(): try: - if downloaded_path.exists() and downloaded_path.stat().st_size > 0: - if final_state != TransferState.COMPLETE: - log( - f"[soulseek] Transfer finalized as {final_state}, but file exists ({downloaded_path.stat().st_size} bytes). Keeping file.", + await client.start() + await client.login() + debug(f"[soulseek] Logged in as {login_user}") + + log( + f"[soulseek] Download attempt {attempt_num}: {username} :: {local_filename}", + file=sys.stderr, + ) + debug(f"[soulseek] Requesting download from {username}: {filename}") + + transfer = await client.transfers.add(Transfer(username, filename, TransferDirection.DOWNLOAD)) + transfer.local_path = str(output_path) + await client.transfers.queue(transfer) + + start_time = time.time() + last_progress_time = start_time + progress_bar = ProgressBar() + + while not transfer.is_finalized(): + elapsed = time.time() - start_time + if elapsed > timeout: + log(f"[soulseek] Download timeout after {timeout}s", file=sys.stderr) + bytes_done = int(getattr(transfer, "bytes_transfered", 0) or 0) + state_val = getattr(getattr(transfer, "state", None), "VALUE", None) + progress_bar.finish() + return None, state_val, bytes_done, elapsed + + bytes_done = int(getattr(transfer, "bytes_transfered", 0) or 0) + total_bytes = int(getattr(transfer, "filesize", 0) or 0) + now = time.time() + if now - last_progress_time >= 0.5: + progress_bar.update( + downloaded=bytes_done, + total=total_bytes if total_bytes > 0 else None, + label="download", file=sys.stderr, ) + last_progress_time = now + + await asyncio.sleep(1) + + final_state = getattr(getattr(transfer, "state", None), "VALUE", None) + downloaded_path = ( + Path(transfer.local_path) + if getattr(transfer, "local_path", None) + else output_path + ) + final_elapsed = time.time() - start_time + + # Clear in-place progress bar. + progress_bar.finish() + + # If a file was written, treat it as success even if state is odd. + try: + if downloaded_path.exists() and downloaded_path.stat().st_size > 0: + if final_state != TransferState.COMPLETE: + log( + f"[soulseek] Transfer finalized as {final_state}, but file exists ({downloaded_path.stat().st_size} bytes). Keeping file.", + file=sys.stderr, + ) + return downloaded_path, final_state, int(downloaded_path.stat().st_size), final_elapsed + except Exception: + pass + + if final_state == TransferState.COMPLETE and downloaded_path.exists(): + debug(f"[soulseek] Download complete: {downloaded_path}") return downloaded_path, final_state, int(downloaded_path.stat().st_size), final_elapsed - except Exception: - pass - if final_state == TransferState.COMPLETE and downloaded_path.exists(): - debug(f"[soulseek] Download complete: {downloaded_path}") - return downloaded_path, final_state, int(downloaded_path.stat().st_size), final_elapsed + fail_bytes = int(getattr(transfer, "bytes_transfered", 0) or 0) + fail_total = int(getattr(transfer, "filesize", 0) or 0) + reason = getattr(transfer, "reason", None) + log( + f"[soulseek] Download failed: state={final_state} bytes={fail_bytes}/{fail_total} reason={reason}", + file=sys.stderr, + ) - fail_bytes = int(getattr(transfer, "bytes_transfered", 0) or 0) - fail_total = int(getattr(transfer, "filesize", 0) or 0) - reason = getattr(transfer, "reason", None) - log( - f"[soulseek] Download failed: state={final_state} bytes={fail_bytes}/{fail_total} reason={reason}", - file=sys.stderr, - ) - - # Clean up 0-byte placeholder. - try: - if downloaded_path.exists() and downloaded_path.stat().st_size == 0: - downloaded_path.unlink(missing_ok=True) - except Exception: - pass - return None, final_state, fail_bytes, final_elapsed - - finally: - try: - await client.stop() - except Exception: - pass + # Clean up 0-byte placeholder. + try: + if downloaded_path.exists() and downloaded_path.stat().st_size == 0: + downloaded_path.unlink(missing_ok=True) + except Exception: + pass + return None, final_state, fail_bytes, final_elapsed + finally: + try: + await client.stop() + except Exception: + pass + # Let cancellation/cleanup callbacks run while our exception handler is still installed. + try: + await asyncio.sleep(0) + except Exception: + pass # Retry a couple times only for fast 0-byte failures (common transient case). max_attempts = 3 diff --git a/SYS/download.py b/SYS/download.py index abbf0c1..f567d64 100644 --- a/SYS/download.py +++ b/SYS/download.py @@ -606,6 +606,34 @@ def _download_direct_file( except Exception as e: if not quiet: log(f"Could not get filename from headers: {e}", file=sys.stderr) + + # Guardrail: never treat HTML landing pages as downloadable files. + # We explicitly probe with GET for page-like endpoints (e.g. *.php) since some + # servers block/lie on HEAD, and a URL path like `edition.php` would otherwise + # be saved as a bogus file. + try: + page_like_exts = {".php", ".asp", ".aspx", ".jsp", ".cgi"} + ext = "" + try: + ext = Path(str(filename or "")).suffix.lower() + except Exception: + ext = "" + + ct0 = (content_type or "").split(";", 1)[0].strip().lower() + must_probe = bool(ct0.startswith("text/html") or ext in page_like_exts) + + if must_probe: + with HTTPClient(timeout=10.0) as client: + with client._request_stream("GET", url, follow_redirects=True) as resp: + resp.raise_for_status() + ct = str(resp.headers.get("content-type", "") or "").split(";", 1)[0].strip().lower() + if ct.startswith("text/html"): + raise DownloadError("URL appears to be an HTML page, not a direct file") + except DownloadError: + raise + except Exception: + # If we can't probe, keep going; later logic may still infer a safe extension. + pass # Apply suggested filename (from provider title) if given. suggested = _sanitize_filename(suggested_filename) if suggested_filename else "" diff --git a/cmdlet/download_file.py b/cmdlet/download_file.py index 90fbf23..ee6c78b 100644 --- a/cmdlet/download_file.py +++ b/cmdlet/download_file.py @@ -319,6 +319,18 @@ class Download_File(Cmdlet): except Exception: provider_name = None + # Heuristic: LibGen often uses landing pages like edition.php/file.php. + # These should never be treated as direct file URLs. + if not provider_name: + try: + p = urlparse(str(url)) + h = (p.hostname or "").strip().lower() + path = (p.path or "").strip().lower() + if "libgen" in h and any(x in path for x in ("/edition.php", "/file.php", "/ads.php", "/get.php", "/series.php")): + provider_name = "libgen" + except Exception: + pass + if provider_name and get_provider is not None and SearchResult is not None: # OpenLibrary URLs should be handled by the OpenLibrary provider. if provider_name == "openlibrary": @@ -391,11 +403,19 @@ class Download_File(Cmdlet): progress.clear_status() if downloaded_path: + tags_hint: Optional[List[str]] = None + try: + sr_tags = getattr(sr, "tag", None) + if isinstance(sr_tags, set) and sr_tags: + tags_hint = sorted([str(t) for t in sr_tags if t]) + except Exception: + tags_hint = None + self._emit_local_file( downloaded_path=Path(downloaded_path), source=str(url), title_hint=title_hint, - tags_hint=None, + tags_hint=tags_hint, media_kind_hint="book", full_metadata=sr.full_metadata, provider_hint="openlibrary", @@ -472,14 +492,42 @@ class Download_File(Cmdlet): except Exception: downloaded_path = None + # Refuse to fall back to direct-download for LibGen landing pages. + # This prevents saving HTML (e.g. edition.php) as a bogus file. + if (not downloaded_path) and str(provider_name).lower() == "libgen": + raise DownloadError("LibGen URL did not resolve to a downloadable file") + if downloaded_path: + tags_hint: Optional[List[str]] = None + full_md: Optional[Dict[str, Any]] = None + title_hint = Path(str(downloaded_path)).stem + media_kind_hint = "file" + + if str(provider_name).lower() == "libgen": + media_kind_hint = "book" + try: + sr_tags = getattr(sr, "tag", None) + if isinstance(sr_tags, set) and sr_tags: + tags_hint = sorted([str(t) for t in sr_tags if t]) + except Exception: + tags_hint = None + + try: + if isinstance(getattr(sr, "full_metadata", None), dict): + full_md = sr.full_metadata + t = str(full_md.get("title") or "").strip() + if t: + title_hint = t + except Exception: + full_md = None + self._emit_local_file( downloaded_path=Path(downloaded_path), source=str(url), - title_hint=Path(str(downloaded_path)).stem, - tags_hint=None, - media_kind_hint="file", - full_metadata=None, + title_hint=title_hint, + tags_hint=tags_hint, + media_kind_hint=media_kind_hint, + full_metadata=full_md, provider_hint=str(provider_name), progress=progress, config=config, @@ -600,6 +648,7 @@ class Download_File(Cmdlet): # If this looks like a provider item and providers are available, prefer provider.download() downloaded_path: Optional[Path] = None attempted_provider_download = False + provider_sr = None if table and get_search_provider and SearchResult: provider = get_search_provider(str(table), config) if provider is not None: @@ -612,6 +661,7 @@ class Download_File(Cmdlet): ) debug(f"[download-file] Downloading provider item via {table}: {sr.title}") downloaded_path = provider.download(sr, final_output_dir) + provider_sr = sr # OpenLibrary: if provider download failed, do NOT try to download the OpenLibrary page HTML. if downloaded_path is None and attempted_provider_download and str(table or "").lower() == "openlibrary": @@ -693,6 +743,30 @@ class Download_File(Cmdlet): log(f"Cannot download item (no provider handler / unsupported target): {title or target}", file=sys.stderr) continue + # Allow providers to add/enrich tags and metadata during download. + if str(table or "").lower() == "libgen" and provider_sr is not None: + try: + sr_tags = getattr(provider_sr, "tag", None) + if tags_list is None and isinstance(sr_tags, set) and sr_tags: + tags_list = sorted([str(t) for t in sr_tags if t]) + except Exception: + pass + + try: + sr_md = getattr(provider_sr, "full_metadata", None) + if isinstance(sr_md, dict) and sr_md: + full_metadata = sr_md + except Exception: + pass + + try: + if isinstance(full_metadata, dict): + t = str(full_metadata.get("title") or "").strip() + if t: + title = t + except Exception: + pass + self._emit_local_file( downloaded_path=downloaded_path, source=str(target) if target else None, diff --git a/cmdlet/download_media.py b/cmdlet/download_media.py index 0ab3cc2..c460d0e 100644 --- a/cmdlet/download_media.py +++ b/cmdlet/download_media.py @@ -1593,6 +1593,36 @@ class Download_Media(Cmdlet): debug("Preflight URL check: no matches") return True + # If the user already answered the duplicate URL prompt for this pipeline/command, + # respect that decision and don't re-prompt for every item. + try: + current_cmd_text = pipeline_context.get_current_command_text("") + except Exception: + current_cmd_text = "" + + try: + stage_ctx = pipeline_context.get_stage_context() + except Exception: + stage_ctx = None + + in_pipeline = bool(stage_ctx is not None or ("|" in str(current_cmd_text or ""))) + if in_pipeline: + try: + cached_cmd = pipeline_context.load_value("preflight.url_duplicates.command", default="") + cached_decision = pipeline_context.load_value("preflight.url_duplicates.continue", default=None) + except Exception: + cached_cmd = "" + cached_decision = None + + if cached_decision is not None and str(cached_cmd or "") == str(current_cmd_text or ""): + if bool(cached_decision): + return True + try: + pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0) + except Exception: + pass + return False + table = ResultTable(f"URL already exists ({len(url_matches)} match(es))") results_list: List[Dict[str, Any]] = [] for item in url_matches: @@ -1639,8 +1669,28 @@ class Download_Media(Cmdlet): with cm: get_stderr_console().print(table) setattr(table, "_rendered_by_cmdlet", True) - if not Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()): - if used_suspend: + answered_yes = bool(Confirm.ask("Continue anyway?", default=False, console=get_stderr_console())) + + # Cache decision for the duration of this pipeline/command. + if in_pipeline: + try: + existing = pipeline_context.load_value("preflight", default=None) + except Exception: + existing = None + preflight_cache: Dict[str, Any] = existing if isinstance(existing, dict) else {} + url_dup_cache = preflight_cache.get("url_duplicates") + if not isinstance(url_dup_cache, dict): + url_dup_cache = {} + url_dup_cache["command"] = str(current_cmd_text or "") + url_dup_cache["continue"] = bool(answered_yes) + preflight_cache["url_duplicates"] = url_dup_cache + try: + pipeline_context.store_value("preflight", preflight_cache) + except Exception: + pass + + if not answered_yes: + if in_pipeline and used_suspend: try: pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0) except Exception: @@ -1666,6 +1716,35 @@ class Download_Media(Cmdlet): debug("Bulk URL preflight skipped: storage unavailable") return True + # Honor any prior duplicate URL decision for this pipeline/command. + try: + current_cmd_text = pipeline_context.get_current_command_text("") + except Exception: + current_cmd_text = "" + + try: + stage_ctx = pipeline_context.get_stage_context() + except Exception: + stage_ctx = None + + in_pipeline = bool(stage_ctx is not None or ("|" in str(current_cmd_text or ""))) + if in_pipeline: + try: + cached_cmd = pipeline_context.load_value("preflight.url_duplicates.command", default="") + cached_decision = pipeline_context.load_value("preflight.url_duplicates.continue", default=None) + except Exception: + cached_cmd = "" + cached_decision = None + + if cached_decision is not None and str(cached_cmd or "") == str(current_cmd_text or ""): + if bool(cached_decision): + return True + try: + pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0) + except Exception: + pass + return False + unique_urls: List[str] = [] for u in urls or []: s = str(u or "").strip() @@ -1930,7 +2009,32 @@ class Download_Media(Cmdlet): cm = nullcontext() with cm: - if not Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()): + answered_yes = bool(Confirm.ask("Continue anyway?", default=False, console=get_stderr_console())) + + # Cache decision for the duration of this pipeline/command. + if in_pipeline: + try: + existing = pipeline_context.load_value("preflight", default=None) + except Exception: + existing = None + preflight_cache: Dict[str, Any] = existing if isinstance(existing, dict) else {} + url_dup_cache = preflight_cache.get("url_duplicates") + if not isinstance(url_dup_cache, dict): + url_dup_cache = {} + url_dup_cache["command"] = str(current_cmd_text or "") + url_dup_cache["continue"] = bool(answered_yes) + preflight_cache["url_duplicates"] = url_dup_cache + try: + pipeline_context.store_value("preflight", preflight_cache) + except Exception: + pass + + if not answered_yes: + if in_pipeline: + try: + pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0) + except Exception: + pass return False return True @@ -1947,6 +2051,20 @@ class Download_Media(Cmdlet): if not isinstance(entries, list) or len(entries) <= 1: return False + # Identify a stable table type so `@* | ...` pipelines can auto-insert the + # appropriate downloader stage (e.g., Bandcamp selections should insert + # `download-media` before `merge-file`). + extractor_name = "" + try: + extractor_name = str(pr.get("extractor") or pr.get("extractor_key") or "").strip().lower() + except Exception: + extractor_name = "" + table_type: Optional[str] = None + if "bandcamp" in extractor_name: + table_type = "bandcamp" + elif "youtube" in extractor_name: + table_type = "youtube" + # Display table (limit rows to keep output reasonable) max_rows = 200 display_entries = entries[:max_rows] @@ -1976,6 +2094,11 @@ class Download_Media(Cmdlet): table = ResultTable() safe_url = str(url or "").strip() table.title = f'download-media -url "{safe_url}"' if safe_url else "download-media" + if table_type: + try: + table.set_table(table_type) + except Exception: + table.table = table_type table.set_source_command("download-media", []) try: table.set_preserve_order(True) @@ -2581,7 +2704,10 @@ class Download_Media(Cmdlet): PipelineProgress(pipeline_context).step("finalized") stage_ctx = pipeline_context.get_stage_context() - emit_enabled = bool(stage_ctx is not None and not getattr(stage_ctx, "is_last_stage", False)) + # Emit per downloaded item whenever we're running under the pipeline runner. + # Live progress advances on emit(), and suppressing emits for the last stage + # causes the pipe bar to stay at 0% even while downloads complete. + emit_enabled = bool(stage_ctx is not None) for pipe_obj_dict in pipe_objects: if emit_enabled: pipeline_context.emit(pipe_obj_dict) diff --git a/cmdlet/get_url.py b/cmdlet/get_url.py index 3558efe..669d3f4 100644 --- a/cmdlet/get_url.py +++ b/cmdlet/get_url.py @@ -85,7 +85,12 @@ class Get_Url(Cmdlet): if title: table_title = f"Title: {title}" - table = ResultTable(table_title, max_columns=1).set_preserve_order(True) + table = ( + ResultTable(table_title, max_columns=1) + .set_preserve_order(True) + .set_table("url") + .set_value_case("preserve") + ) table.set_source_command("get-url", []) items: List[UrlItem] = [] diff --git a/docs/img/bandcamp-album.svg b/docs/img/bandcamp-album.svg new file mode 100644 index 0000000..e7231a9 --- /dev/null +++ b/docs/img/bandcamp-album.svg @@ -0,0 +1,283 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Rich + + + + + + + + + + ╭─────────────────────────────────────────────────────────────────────────────────── Bandcamp: artist:Altrusian Grace Media ───────────────────────────────────────────────────────────────────────────────────╮ + + #TITLE                                                                                       TYPE URL                                                                                          + ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────  + 1esoteric and occult wisdom - master collection                                              albumhttps://altrusiangrace.bandcamp.com/album/esoteric-and-occult-wisdom-master-collection       + 2esoteric and occult wisdom - master collection (volume 2)                                   albumhttps://altrusiangrace.bandcamp.com/album/esoteric-and-occult-wisdom-master-collection-volu… + 3the mystical hymns of orpheus (full audiobook) thomas taylor                                albumhttps://altrusiangrace.bandcamp.com/album/the-mystical-hymns-of-orpheus-full-audiobook       + 4the prophecies of nostradamus (full audiobook) michel nostradamus                           albumhttps://altrusiangrace.bandcamp.com/album/the-prophecies-of-nostradamus-full-audiobook       + 5psychic self-defense (full audiobook) dion fortune                                          albumhttps://altrusiangrace.bandcamp.com/album/psychic-self-defense-full-audiobook                + 6atlantean twilight (cinematic ambient pagan folk version) psyclopean                        albumhttps://altrusiangrace.bandcamp.com/album/atlantean-twilight-cinematic-ambient-pagan-folk-v… + 7a road to self-knowledge (full audiobook) rudolf steiner                                    albumhttps://altrusiangrace.bandcamp.com/album/a-road-to-self-knowledge-full-audiobook            + 8zetetic astronomy: earth not a globe (full audiobook) samuel rowbotham                      albumhttps://altrusiangrace.bandcamp.com/album/zetetic-astronomy-earth-not-a-globe-full-audiobook + 9the philosophy of freedom (full audiobook) rudolf steiner                                   albumhttps://altrusiangrace.bandcamp.com/album/the-philosophy-of-freedom-full-audiobook           +10the creative use of imagination (full audiobook) neville goddard                            albumhttps://altrusiangrace.bandcamp.com/album/the-creative-use-of-imagination-full-audiobook     +11the willows (full audiobook) algernon blackwood                                             albumhttps://altrusiangrace.bandcamp.com/album/the-willows-full-audiobook                         +12hidden symbolism of alchemy and the occult arts (full audiobook) herbert silberer           albumhttps://altrusiangrace.bandcamp.com/album/hidden-symbolism-of-alchemy-and-the-occult-arts-f… +13the three books of enoch r.h. charles                                                       albumhttps://altrusiangrace.bandcamp.com/album/the-three-books-of-enoch                           +14the rig veda ralph t.h. griffith                                                            albumhttps://altrusiangrace.bandcamp.com/album/the-rig-veda                                       +15the red fairy book (full audiobook) andrew lang                                             albumhttps://altrusiangrace.bandcamp.com/album/the-red-fairy-book-full-audiobook                  +16ancient egyptian legends (full audiobook) m.a. murray                                       albumhttps://altrusiangrace.bandcamp.com/album/ancient-egyptian-legends-full-audiobook            +17the ramayana (full audiobook) the sage valmiki                                              albumhttps://altrusiangrace.bandcamp.com/album/the-ramayana-full-audiobook                        +18a dreamer's tales (full audiobook) lord dunsany                                             albumhttps://altrusiangrace.bandcamp.com/album/a-dreamers-tales-full-audiobook                    +19electric altar: psychedelic occult masterpieces from behind the veil various                albumhttps://altrusiangrace.bandcamp.com/album/electric-altar-psychedelic-occult-masterpieces-fr… +20electric altar 2: more psychedelic occult masterpieces from behind the veil various         albumhttps://altrusiangrace.bandcamp.com/album/electric-altar-2-more-psychedelic-occult-masterpi… +21the 1971 interdimensional jazz festival various                                             albumhttps://altrusiangrace.bandcamp.com/album/the-1971-interdimensional-jazz-festival            +22the eschaton psyclopean                                                                     albumhttps://altrusiangrace.bandcamp.com/album/the-eschaton                                       +23demonology and devil-lore (full audiobook) moncure daniel conway                            albumhttps://altrusiangrace.bandcamp.com/album/demonology-and-devil-lore-full-audiobook           +24fifty-one tales (full audiobook) lord dunsany                                               albumhttps://altrusiangrace.bandcamp.com/album/fifty-one-tales-full-audiobook                     +25meditations (full audiobook) marcus aurelius                                                albumhttps://altrusiangrace.bandcamp.com/album/meditations-full-audiobook                         +26time and the gods (full audiobook) lord dunsany                                             albumhttps://altrusiangrace.bandcamp.com/album/time-and-the-gods-full-audiobook                   +27the biography of satan (full audiobook) kersey graves                                       albumhttps://altrusiangrace.bandcamp.com/album/the-biography-of-satan-full-audiobook              +28applied magic (full audiobook) dion fortune                                                 albumhttps://altrusiangrace.bandcamp.com/album/applied-magic-full-audiobook                       +29in search of the miraculous (full audiobook) p.d. ouspensky                                 albumhttps://altrusiangrace.bandcamp.com/album/in-search-of-the-miraculous-full-audiobook         +30the sword of welleran and other stories (full audiobook) lord dunsany                       albumhttps://altrusiangrace.bandcamp.com/album/the-sword-of-welleran-and-other-stories-full-audi… +31flying saucers: a modern myth of things seen in the sky (full audiobook) c.g jung           albumhttps://altrusiangrace.bandcamp.com/album/flying-saucers-a-modern-myth-of-things-seen-in-th… +32aspects of occultism (full audiobook) dion fortune                                          albumhttps://altrusiangrace.bandcamp.com/album/aspects-of-occultism-full-audiobook                +33mazes and labyrinths: a general account of their history and development (full audiobook)   albumhttps://altrusiangrace.bandcamp.com/album/mazes-and-labyrinths-a-general-account-of-their-h… +w.h. matthews                                                                                +34crystal gazing: its history and practice (full audiobook) northcote thomas                  albumhttps://altrusiangrace.bandcamp.com/album/crystal-gazing-its-history-and-practice-full-audi… +35the way of initiation (full audiobook) rudolf steiner                                       albumhttps://altrusiangrace.bandcamp.com/album/the-way-of-initiation-full-audiobook               +36thrice-greatest hermes (full audiobook) g.r.s. mead                                         albumhttps://altrusiangrace.bandcamp.com/album/thrice-greatest-hermes-full-audiobook              +37sane occultism (full audiobook) dion fortune                                                albumhttps://altrusiangrace.bandcamp.com/album/sane-occultism-full-audiobook                      +38the agm lovecraft collection h.p. lovecraft                                                 albumhttps://altrusiangrace.bandcamp.com/album/the-agm-lovecraft-collection                       +39the agm clark ashton smith collection                                                       albumhttps://altrusiangrace.bandcamp.com/album/the-agm-clark-ashton-smith-collection              +40the agm robert e. howard collection                                                         albumhttps://altrusiangrace.bandcamp.com/album/the-agm-robert-e-howard-collection                 +41the book of wonder (full audiobook) lord dunsany                                            albumhttps://altrusiangrace.bandcamp.com/album/the-book-of-wonder-full-audiobook                  +42the secret destiny of america (full audiobook) manly p. hall                                albumhttps://altrusiangrace.bandcamp.com/album/the-secret-destiny-of-america-full-audiobook       +43atlantean twilight: vaporous vignettes inspired by clark ashton smith psyclopean            albumhttps://altrusiangrace.bandcamp.com/album/atlantean-twilight-vaporous-vignettes-inspired-by… +44pistis sophia (full audiobook) g.r.s. mead                                                  albumhttps://altrusiangrace.bandcamp.com/album/pistis-sophia-full-audiobook                       +45the ten principal upanishads (full audiobook) w. b. yeats                                   albumhttps://altrusiangrace.bandcamp.com/album/the-ten-principal-upanishads-full-audiobook        +46dreams - what they are and how they are caused (full audiobook) c.w. leadbeater             albumhttps://altrusiangrace.bandcamp.com/album/dreams-what-they-are-and-how-they-are-caused-full… +47secret of the golden flower (full audiobook) richard wilhelm, c.g. jung                     albumhttps://altrusiangrace.bandcamp.com/album/secret-of-the-golden-flower-full-audiobook         +48the hymns of hermes (full audiobook) g.r.s. mead                                            albumhttps://altrusiangrace.bandcamp.com/album/the-hymns-of-hermes-full-audiobook                 +49the prose of clark ashton smith (full audiobook) clark ashton smith                         albumhttps://altrusiangrace.bandcamp.com/album/the-prose-of-clark-ashton-smith-full-audiobook     +50veriditas illuminated void                                                                  albumhttps://altrusiangrace.bandcamp.com/album/veriditas                                          + +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ + + + + diff --git a/docs/img/bandcamp-artist.svg b/docs/img/bandcamp-artist.svg new file mode 100644 index 0000000..9384384 --- /dev/null +++ b/docs/img/bandcamp-artist.svg @@ -0,0 +1,83 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Rich + + + + + + + + + + ╭─────────────────────────────────────────────────────────────────────────────────── Bandcamp: artist:altrusian grace media ───────────────────────────────────────────────────────────────────────────────────╮ + +    #TITLE                                          LOCATION                                     TYPE           URL                                                                          + ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────  +    1altrusian grace media                          milwaukee, wisconsin                         artist         https://altrusiangrace.bandcamp.com                                          + +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ + + + + diff --git a/docs/img/download-media-bandcamp.svg b/docs/img/download-media-bandcamp.svg new file mode 100644 index 0000000..e63dace --- /dev/null +++ b/docs/img/download-media-bandcamp.svg @@ -0,0 +1,151 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Rich + + + + + + + + + + ╭───────────────────────────────────────────── download-media -url "https://altrusiangrace.bandcamp.com/album/zetetic-astronomy-earth-not-a-globe-full-audiobook" ─────────────────────────────────────────────╮ + +               #TITLE                                                                                      DURATION                                  UPLOADER                                   + ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────  +               1opening credits                                                                             +               2preface                                                                                     +               3chapter 1                                                                                   +               4chapter 2                                                                                   +               5chapter 3                                                                                   +               6chapter 4                                                                                   +               7chapter 5                                                                                   +               8chapter 6                                                                                   +               9chapter 7                                                                                   +              10chapter 8                                                                                   +              11chapter 9                                                                                   +              12chapter 10                                                                                  +              13chapter 11                                                                                  +              14chapter 12                                                                                  +              15chapter 13                                                                                  +              16chapter 14 (part 1)                                                                         +              17chapter 14 (part 2)                                                                         +              18chapter 15                                                                                  + +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ + + + + diff --git a/docs/img/get-url.svg b/docs/img/get-url.svg new file mode 100644 index 0000000..1c7e880 --- /dev/null +++ b/docs/img/get-url.svg @@ -0,0 +1,87 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Rich + + + + + + + + + + ╭──────────────────────────────────────────────────────────────────────────────────────────────────── url ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ + +   #URL                                              HASH                                                                                                                      STORE            + ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────  +   1https://www.timecube.net/                        e7e50263f8644a28b6b6af1e10283a47c018af504277c83363b38cd76cf89365                                                          tutorial         +   2https://0x0.st/pzgy.webp                         e7e50263f8644a28b6b6af1e10283a47c018af504277c83363b38cd76cf89365                                                          tutorial         + +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ + + + + diff --git a/docs/img/screen-shot.svg b/docs/img/screen-shot.svg new file mode 100644 index 0000000..82b13c0 --- /dev/null +++ b/docs/img/screen-shot.svg @@ -0,0 +1,83 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Rich + + + + + + + + + + ╭────────────────────────────────────────────────────────────────────────────────────────── add-file -store tutorial ──────────────────────────────────────────────────────────────────────────────────────────╮ + +          #TITLE                                                                         STORE                                   SIZE                                EXT                     + ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────  +          1timecube homepage                                                             tutorial                                2.69 mb                             webp                    + +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ + + + + diff --git a/docs/img/soulseek.svg b/docs/img/soulseek.svg new file mode 100644 index 0000000..7b62aef --- /dev/null +++ b/docs/img/soulseek.svg @@ -0,0 +1,251 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Rich + + + + + + + + + + ╭───────────────────────────────────────────────────────────────────────────────────────── Soulseek: erika herms niel ─────────────────────────────────────────────────────────────────────────────────────────╮ + +    #TRACK     TITLE                                                             ARTIST                                     ALBUM                                               SIZE      + ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────  +    1?          01 stay awhile thy flying (unmada in u)                          freak folk                                 matt valentine & erika elder fan                    4 mb      +    2?          02 the devil's dream.aif                                         freak folk                                 matt valentine & erika elder fan                    9 mb      +    3?         03-offcut_stamp_blues-hit                                         freak folk                                 matt valentine & erika elder fan                    4 mb      +    4?         04-astral_reeperbahn-hit                                          freak folk                                 matt valentine & erika elder fan                    3 mb      +    5?         05-refried_cocola_again-hit                                       freak folk                                 matt valentine & erika elder fan                    12 mb     +    6?         06-cocola_and_kif-hit                                             freak folk                                 matt valentine & erika elder fan                    1 mb      +    7?         07-128 one hot pleasure(dave aude club d                          10housemix                                 04 housemix 2011-04 peak hour                       18 mb     +    8?         07-129 one hot pleasure(dave aude remix)                          10housemix                                 06 housemix 2011-06 peak hour                       19 mb     +    9?         08-burying_ground_zero_blues-hit                                  freak folk                                 matt valentine & erika elder fan                    4 mb      +   10?         09-where_is_the_transcendental_thana-hit                          freak folk                                 matt valentine & erika elder fan                    16 mb     +   11?         2.22. feel me (feat. erika scherlin)                              down                                       papik - sounds for the open road                    10 mb     +   12?         d11 erika (herms niel)                                            wunschkonzert                              cd4                                                 4 mb      +   13?         erika                                                             herms niel                                 classical                                           17 mb     +   14?         erika vip3 mehdizz special                                        !!! plts !!!                               !                 fresh dubs (oc                    3 mb      +   15?         herms niel - erika                                                @@nlnfo                                    lossless audio                                      17 mb     +   16?         herms niel - erika                                                musique                                    herms niel                                          17 mb     +   17?         herms niel - erika                                                @@nlnfo                                    lossless audio                                      30 mb     +   18?         herms niel - erika                                                music                                      unordered 2                                         2 mb      +   19?         john dahlback, erika, larsson - stay (or                          aaaa                                       123 april                                           8 mb      +   20?         matt valentine & erika elder - 03 - offc                          soulseek files                             freak folk                                          3 mb      +   21?         matt valentine & erika elder - 06 - coco                          soulseek files                             freak folk                                          1 mb      +   22?         matt valentine & erika elder - 07 - the                           soulseek files                             freak folk                                          6 mb      +   23?         matt valentine & erika elder - 08 - bury                          soulseek files                             freak folk                                          3 mb      +   24?         matt valentine & erika elder - 09 - wher                          soulseek files                             freak folk                                          11 mb     +   25?         matt valentine & erika elder - lunar blu                          freak folk                                 matt valentine and erika elder-                     7 mb      +   26?         matt valentine & erika elder - lunar blu                          freak folk                                 matt valentine and erika elder-                     7 mb      +   27?         matt valentine & erika elder - lunar blu                          freak folk                                 matt valentine and erika elder-                     3 mb      +   28?         matt valentine & erika elder - ragantula                          freak folk                                 mv & ee-2002 - ragantula                            19 mb     +   29?         matt valentine & erika elder - ragantula                          freak folk                                 mv & ee-2002 - ragantula                            9 mb      +   30?         matt valentine & erika elder - ragantula                          freak folk                                 mv & ee-2002 - ragantula                            8 mb      +   31?         matt valentine & erika elder - ragantula                          freak folk                                 mv & ee-2002 - ragantula                            8 mb      +   32?         matt valentine & erika elder - ragantula                          freak folk                                 mv & ee-2002 - ragantula                            4 mb      +   33?         matt valentine & erika elder - ragantula                          freak folk                                 mv & ee-2002 - ragantula                            4 mb      +   34?         matt valentine & erika elder - ragantula                          freak folk                                 mv & ee-2002 - ragantula                            3 mb      +   35?         matt valentine & erika elder - ragantula                          freak folk                                 mv & ee-2002 - ragantula                            2 mb      +   36?         matt valentine - lunar blues 02 - freigh                          freak folk                                 matt valentine and erika elder-                     7 mb      +   37?         matt valentine - lunar blues 03 - candym                          freak folk                                 matt valentine and erika elder-                     8 mb      +   38?         matt valentine - lunar blues 04 - mine a                          freak folk                                 matt valentine and erika elder-                     5 mb      +   39?         matt valentine - lunar blues 05 - sic 'e                          freak folk                                 matt valentine and erika elder-                     4 mb      +   40?         matt valentine - lunar blues 06 - one st                          freak folk                                 matt valentine and erika elder-                     5 mb      +   41?         matt valentine - lunar blues 07 - death                           freak folk                                 matt valentine and erika elder-                     11 mb     +   42125       speechless(ext)                                                   08 must play old but still                 must play old but still pt 1                        10 mb     +   43?         xpos3d - erika full v1                                            ! belgian jump up dubs                     xpos3d                                              19 mb     + +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ + + + + diff --git a/docs/tutorial.md b/docs/tutorial.md index 3824b36..58dc210 100644 --- a/docs/tutorial.md +++ b/docs/tutorial.md @@ -79,6 +79,136 @@ or if you have mpv installed (the preferred way for video files) # Bandcamp downloading (provider method)
<🜂🜄|🜁🜃>
-
search-provider -provider bandcamp "artist:altrusian grace media"
+  
search-provider -provider bandcamp -query "artist:altrusian grace media"
+
+
+ +![search-provider -provider bandcamp](img/bandcamp-artist.svg) +this brings up special scraper for bandcamp on the artist page, the query syntax of "artist:" is how we parse args for some cmdlets. next run the following + +
+
<🜂🜄|🜁🜃>
+
@1
+
+
+ +![bandcamp-artist](img/bandcamp-album.svg) +this shows a list of bandcamp album links, go ahead and enter this below + +
+
<🜂🜄|🜁🜃>
+
@8
+
+
+ +![download-media-bandcamp](img/download-media-bandcamp.svg) +this shows a list of bandcamp album link, if you look at the title of the table, download-media -url "https://altrusiangrace.bandcamp.com/album/zetetic-astronomy-earth-not-a-globe-full-audiobook" , you could just copy the url from webpage and paste it as this command to get the table without going through the provider search. In this table, all these are playlist items, each item is its own file but in this case its connected and they are in an album. if you wanted to only download a couple you could do @1,5,7 and that would only download those items, you can also do a range @1-6 and that will download 1 through 6. For this tutorial, run this below. + +
+
<🜂🜄|🜁🜃>
+
@* | merge-file | add-file -store tutorial
+
+
+ +this will download the entire playlist, merge them into one file while putting chapter markers for the file merge points. + +# screenshot + +medios can take screenshots of pages and output them as pdf, jpg, png, and webp. the default is webp as its the smallest format. the screenshot is fullscreen and scrolls. run this below + +
+
<🜂🜄|🜁🜃>
+
screen-shot "https://www.timecube.net/" | add-tag "title:timecube homepage" | add-file -store tutorial
+
+
+ +![screen-shot](img/screen-shot.svg) + +
+
<🜂🜄|🜁🜃>
+
@1 | get-file
+
+
+ +this opens up a webrowser with your media file embedded, you can copy and paste or if you want to share on a public hoster, you can do this + +
+
<🜂🜄|🜁🜃>
+
@1 | add-file -provider 0x0
+
+
+ +this will upload your media file to 0x0.com and at the end provide you a link, you dont need to worry about saving it, you can always retrieve the link where you downloaded the file by running + +
+
<🜂🜄|🜁🜃>
+
@1 | get-url
+
+
+ +![get-url](img/get-url.svg) + +# soul seek +## soul-seek requires a psuedo account +to make an account in soulseek, simple enter in random username and password and put in your config.conf. you do not need to create an account on the website just put in random name that isint taken with random password, dont user special symbols or spaces. + +
+
config.conf
+
[provider=soulseek]
+username="putinrandomusername"
+password="putinrandompassword"
+
+
+ +restart the cli and check the startup table, if soulseek says ENABLED then you are good to go, run this. + +
+
<🜂🜄|🜁🜃>
+
search-provider -provider soulseek "erika herms niel"
+
+
+ +![soulseek](img/soulseek.svg) + +next run + +
+
<🜂🜄|🜁🜃>
+
@13 | add-file -store tutorial
+
+
+ +this will download and inject into your tutorial store, keep in mind that soulseek is p2p, so download speeds will vary and it may take a minute to connect. + +# Open Library +## OPENLIBRARY/ARCHIVE.ORG ACCOUNT REQUIRED +[Open Library](https://archive.org/account/signup) +throw away account is fine to create. + +
+
config.conf
+
[provider=OpenLibrary]
+email=""
+password=""
+
+
+ + +openlibrary allows us to borrow books, merge them into a permement pdf, then return the book. run this below + +
+
<🜂🜄|🜁🜃>
+
download-file "https://archive.org/details/powerofsatiremag0000elli_h4h9" | add-file -store tutorial
+
+
+ +we could have use the search-provider -provider openlibrary, but to show the versatile of the app, we able to use download-file and medios will be able to intelligently direct it the correct provider (with exception of download-media, download-media is just the frontend for yt-dlp). + +# Libgen +libgen is self-explanatory, + +
+
<🜂🜄|🜁🜃>
+
download-file "https://libgen.gl/ads.php?md5=5c258cc177c1d735c7acfb60fbdb14bf&downloadname=10.1515/9781400870080-009" | add-file -store tutorial
 
diff --git a/metadata.py b/metadata.py index 9f5aa1e..6e87dd7 100644 --- a/metadata.py +++ b/metadata.py @@ -395,32 +395,10 @@ def normalize_urls(value: Any) -> List[str]: if not u: return None - lower = u.lower() - if not (lower.startswith("http://") or lower.startswith("https://")): - return u - - try: - parsed = urlsplit(u) - except Exception: - return u - - scheme = (parsed.scheme or "").lower() - netloc = (parsed.netloc or "").lower() - path = unquote(parsed.path or "") - query = parsed.query or "" - - # Normalize default ports. - if scheme == "http" and netloc.endswith(":80"): - netloc = netloc[:-3] - elif scheme == "https" and netloc.endswith(":443"): - netloc = netloc[:-4] - - # Prefer no trailing slash except root. - if path and path != "/": - path = path.rstrip("/") - - # Fragments are not part of the resource. - return urlunsplit((scheme, netloc, path, query, "")) + # IMPORTANT: URLs can be case-sensitive in the path/query on some hosts + # (e.g., https://0x0.st/PzGY.webp). Do not lowercase or otherwise rewrite + # the URL here; preserve exact casing and percent-encoding. + return u seen: Set[str] = set() out: List[str] = [] diff --git a/pipeline.py b/pipeline.py index c5059b1..84bd613 100644 --- a/pipeline.py +++ b/pipeline.py @@ -860,6 +860,20 @@ def get_last_result_items() -> List[Any]: return [] +def get_last_selectable_result_items() -> List[Any]: + """Get items from the last *selectable* result table, ignoring display-only items. + + This is useful when a selection stage should target the last visible selectable table + (e.g., a playlist/search table), even if a prior action command emitted items and + populated _DISPLAY_ITEMS. + """ + if _LAST_RESULT_TABLE is None: + return list(_LAST_RESULT_ITEMS) + if _is_selectable_table(_LAST_RESULT_TABLE): + return list(_LAST_RESULT_ITEMS) + return [] + + def get_last_result_table_source_command() -> Optional[str]: """Get the source command from the last displayed result table. diff --git a/readme.md b/readme.md index 538e5a6..f92677c 100644 --- a/readme.md +++ b/readme.md @@ -9,66 +9,37 @@ Medios-Macina is a CLI media manager and toolkit focused on downloading, tagging - **Module Mixing:** *[Playwright](https://github.com/microsoft/playwright), [yt-dlp](https://github.com/yt-dlp/yt-dlp), [aioslsk](https://github.com/JurgenR/aioslsk), [telethon](https://github.com/LonamiWebs/Telethon),[typer](https://github.com/fastapi/typer)* - **MPV Manager:** Play audio, video, and even images in a custom designed MPV with trimming, screenshotting, and more built right in! -## Quick start ⚡ +## installation ⚡ - `docs/BOOTSTRAP.md` and use `scripts/bootstrap.ps1` (Windows) or `scripts/bootstrap.sh` (Linux/macOS) to create a venv and install the project. Alternatively, simply run the opinionated helper: `python ./scripts/setup.py`. By default (no flags), `setup.py` will auto-detect your platform and run the matching bootstrap script in **non-interactive (quiet)** mode so you don't need to run the platform-specific script yourself. The bootstrap scripts also attempt (best-effort) to install `mpv` if it's missing from PATH. Note: the Deno installer can require interactive input on some systems; if the automated Deno install fails, the script will warn and you can install Deno manually by following `docs/BOOTSTRAP.md`. +1. run python setup.py -1. Install Python requirements: - -```powershell -python -m pip install -r requirements.txt - -# Automated setup (recommended): run the single Python setup script which installs -# all Python dependencies (from requirements.txt) and downloads Playwright browsers. -# Usage: -# - Default: python ./scripts/setup.py # installs Chromium only (saves disk) -# - To install all Playwright engines: python ./scripts/setup.py --browsers all - -# Advanced options: -# - Skip dependency installation: python ./scripts/setup.py --skip-deps -# - Install only Playwright browsers: python ./scripts/setup.py --playwright-only -# - Install only specific browsers (saves disk): python ./scripts/setup.py --browsers chromium -# - Example: install only Chromium browsers: python ./scripts/setup.py --playwright-only --browsers chromium -``` -2. Copy or edit `config.conf` and set a required `temp` directory where intermediate files are written. Example: +2. rename config.conf.remove to config.conf the store=folder path should be empty folder with no other files in it. ```ini temp="C:\\Users\\Admin\\Downloads" [store=folder] name="default" -path="C:\\Media Machina" +path="C:\Users\Public\Documents\library" -[store=hydrusnetwork] -NAME="home" -API="..." -URL="http://localhost:45869" - -[provider=OpenLibrary] -email="user@example.com" -password="..." ``` -3. Start the CLI: - -```powershell -cd "C:\location\to\repository\medios-machina\" -python cli.py -``` +3. Start the CLI by simply running "mm" in shell or run python cli.py +# [CLICK FOR GUIDED TUTORIAL](/docs/tutorial.md) ## Usage overview 🔧 - Pipelines: chain cmdlets with `|`, e.g., `download-media | add-file -storage local`. - From your shell you can pass a fully-quoted pipeline so the shell doesn't interpret `|` as a pipe: e.g. `mm "download-media | add-file -storage local"` - Format selection (non-interactive): When `download-media` shows multiple formats, you can select one non-interactively by re-running the pipeline and specifying the format: - - Use a format id: `mm "download-media '' -format 243 | add-file -store local"` - - Or use the listed index (1-based): `mm "download-media '' -query 'format:7' | add-file -store local"` + - Use a format id: `mm "download-media '' -query 'format:243' -path 'C://path' " + - Or use the listed index (1-based): `mm "download-media '' -query 'format:7' " Note: The `@N` selection syntax works in the interactive REPL, but shells like PowerShell treat `@` specially — prefer `-query 'format:N'` when running a quoted pipeline from your shell. - Selections: search cmdlets populate a selectable ResultTable; refer to entries with `@`. - Tagging & metadata: `add-tag` mutates piped results (temporary path items) or writes to a configured store when `-store` is provided. -## Built-in image viewer 🎞️ -- MPV automatically detects still-image files and flips into an image viewer mode while leaving the IPC helper aware via `user-data/mpv/image`. +## Built-in image viewer +- MPV automatically detects still-image files and flips into an image viewer - Arrow keys, `WASD`, or `h/j/k/l` pan the image (recently tuned to ±0.05 steps), `Shift+arrow` offers finer nudges, `=`/`-` zoom quickly (~45% per press), `+`/`_` zoom slowly, and `0` resets zoom/pan back to default. - Hit `f` while an image is active to take a screenshot (uses MPV's screenshot pipeline) and get an OSD confirmation. - When MPV loads a video again, the script restores the regular video shortcuts automatically. @@ -77,7 +48,7 @@ python cli.py Simple download with metadata (tags and URL registration): ```bash -download-media "https://www.youtube.com/watch?v=dQw4w9WgXcQ" | add-file -storage local | add-url +download-media "https://www.youtube.com/watch?v=dQw4w9WgXcQ" | add-file -storage local ``` Download a playlist item: @@ -87,19 +58,17 @@ download-media "https://www.youtube.com/playlist?list=PLxxxxx" -item 2 | add-fil Take a website screenshot, tag it, and store locally: ```bash -screen-shot "https://example.com/page" | add-tag "title:Example Page,source:web" | add-file -store local +screen-shot "https://example.com/page" | add-tag "title:Example Page,source:web" -path "c://" ``` OpenLibrary ingestion (book metadata & PDF/ebook handling is automatically enriched when `add-file` detects an OpenLibrary URL): ```bash -add-file "https://openlibrary.org/books/OLxxxxxM/Book_Title" -storage local +download-file "https://openlibrary.org/books/OLxxxxxM/Book_Title" | add-file -storage local ``` -Search & download flow (select with `@`): +Search your library: ```bash -search-file -provider youtube "my favourite track" -@1 -download-media [URL] | add-file -store hydrus +search-store "ext:mp3" ``` ## Providers & stores @@ -110,19 +79,8 @@ download-media [URL] | add-file -store hydrus - **Soulseek, LibGen, All-Debrid, Others**: provider support is modular—add or configure providers in `config.conf`. ## Troubleshooting & tips 🛠️ -- If a cmdlet complains about an unknown store, ensure the piped item has a valid local `path` or use `-store ` to target a configured backend. -- For Playwright screenshots, run `python ./scripts/setup.py` (installs Chromium by default to save download space). To install all engines, run `python ./scripts/setup.py --browsers all`. -- Note: the `screen-shot` cmdlet forces the Playwright **Chromium** engine and will not use Firefox or WebKit. -- To run tests locally after removing `tests/conftest.py`, install the project in editable mode first so tests can import the package: `python -m pip install -e .` or run `python ./scripts/setup.py --install-editable`. -- Deno: The bootstrap scripts will install Deno automatically if it's not already installed (using the official installers). If the installer completes but `deno` is not available in your shell, restart your shell or add `$HOME/.deno/bin` (Windows: `%USERPROFILE%\\.deno\\bin`) to your PATH. - After installation, restart your shell (or add Deno's bin directory to your PATH) so `deno` is available on the command line. -- Use `--debug` to enable verbose logs when tracking down an error. ## Contributing & docs - Developer docs are generated under `docs/` and tests live alongside the code; please run the test suite before submitting changes. -- Contributions welcome—open issues or pull requests with clear descriptions and small, focused diffs. - ---- - -If you'd like, I can add a short _Quick Reference_ section listing the most-used cmdlets and flags, or add badges and a table of contents. What would you like me to add next? 🚀 \ No newline at end of file +- Contributions welcome—open issues or pull requests with clear descriptions and small, focused diffs. \ No newline at end of file