re

2025-12-25 04:49:22 -08:00
parent 2542a68479
commit 43afa4e3fa
19 changed files with 2766 additions and 234 deletions
--- a/Provider/libgen.py
+++ b/Provider/libgen.py
@@ -1,5 +1,6 @@
 from __future__ import annotations

+import html as html_std
 import logging
 import re
 import requests
@@ -22,6 +23,595 @@ except ImportError:
    lxml_html = None


+def _strip_html_to_text(raw: str) -> str:
+    s = html_std.unescape(str(raw or ""))
+    s = re.sub(r"(?i)<br\s*/?>", "\n", s)
+    # Help keep lists readable when they are link-heavy.
+    s = re.sub(r"(?i)</a>", ", ", s)
+    s = re.sub(r"<[^>]+>", " ", s)
+    s = re.sub(r"\s+", " ", s)
+    return s.strip()
+
+
+def _strip_html_to_lines(raw: str) -> List[str]:
+    """Convert a small HTML snippet to a list of meaningful text lines.
+
+    Unlike `_strip_html_to_text`, this preserves `<br>` as line breaks so we can
+    parse LibGen ads.php tag blocks that use `<br>` separators.
+    """
+
+    s = html_std.unescape(str(raw or ""))
+    s = re.sub(r"(?is)<script\b.*?</script>", " ", s)
+    s = re.sub(r"(?is)<style\b.*?</style>", " ", s)
+    s = re.sub(r"(?i)<br\s*/?>", "\n", s)
+    s = re.sub(r"(?i)</p\s*>", "\n", s)
+    s = re.sub(r"(?i)</tr\s*>", "\n", s)
+    # Help keep link-heavy lists readable.
+    s = re.sub(r"(?i)</a>", ", ", s)
+    s = re.sub(r"<[^>]+>", " ", s)
+    s = s.replace("\r\n", "\n").replace("\r", "\n")
+
+    out: List[str] = []
+    for line in s.split("\n"):
+        t = re.sub(r"\s+", " ", str(line or "")).strip()
+        if t:
+            out.append(t)
+    return out
+
+
+def _libgen_md5_from_url(url: str) -> str:
+    try:
+        p = urlparse(str(url or ""))
+        q = p.query or ""
+    except Exception:
+        q = ""
+    m = re.search(r"(?:^|[&?])md5=([a-fA-F0-9]{32})(?:&|$)", q)
+    return str(m.group(1)).lower() if m else ""
+
+
+def _libgen_ads_url_for_target(url: str) -> str:
+    """Best-effort conversion of any LibGen URL to an ads.php URL (same host).
+
+    If md5 is not present, returns empty string.
+    """
+
+    md5 = _libgen_md5_from_url(url)
+    if not md5:
+        return ""
+    try:
+        p = urlparse(str(url or ""))
+        scheme = p.scheme or "https"
+        netloc = p.netloc
+        if not netloc:
+            return ""
+        return f"{scheme}://{netloc}/ads.php?md5={md5}"
+    except Exception:
+        return ""
+
+
+def _parse_libgen_ads_tags_html(html: str) -> Dict[str, Any]:
+    """Parse tags embedded on LibGen ads.php pages.
+
+    Some mirrors render all metadata as a single `<td>` with `<br>` separators:
+        title: ...<br>author(s): ...<br>isbn: ...
+
+    Returns a metadata dict similar to `_parse_libgen_details_html` (subset), plus
+    `_raw_fields` with captured keys.
+    """
+
+    s = str(html or "")
+    td_blocks = re.findall(r"(?is)<td\b[^>]*>(.*?)</td>", s)
+
+    best_lines: List[str] = []
+    best_score = 0
+    for td in td_blocks:
+        lines = _strip_html_to_lines(td)
+        if not lines:
+            continue
+        score = 0
+        for ln in lines:
+            lo = ln.lower()
+            if ":" in ln and any(k in lo for k in ("title", "author", "publisher", "year", "isbn", "language", "series", "tags")):
+                score += 1
+        if score > best_score:
+            best_score = score
+            best_lines = lines
+
+    # Fallback: treat the entire page as a line list.
+    if not best_lines:
+        best_lines = _strip_html_to_lines(s)
+
+    raw_fields: Dict[str, str] = {}
+    pending_key: Optional[str] = None
+
+    def _norm_key(k: str) -> str:
+        kk = str(k or "").strip().lower()
+        kk = re.sub(r"\s+", " ", kk)
+        if kk in {"authors", "author(s)", "author(s).", "author(s):"}:
+            return "author"
+        if kk in {"tag", "tags"}:
+            return "tags"
+        return kk
+
+    for ln in best_lines:
+        line = str(ln or "").strip()
+        if not line:
+            continue
+
+        if ":" in line:
+            k, v = line.split(":", 1)
+            k = _norm_key(k)
+            v = str(v or "").strip()
+            if v:
+                raw_fields[k] = v
+                pending_key = None
+            else:
+                pending_key = k
+            continue
+
+        # Continuation line: if the previous key had no inline value, use this.
+        if pending_key:
+            raw_fields[pending_key] = line
+            pending_key = None
+
+    out: Dict[str, Any] = {"_raw_fields": dict(raw_fields)}
+
+    title = str(raw_fields.get("title") or "").strip()
+    if title:
+        out["title"] = title
+
+    publisher = str(raw_fields.get("publisher") or "").strip()
+    if publisher:
+        out["publisher"] = publisher
+
+    year = str(raw_fields.get("year") or "").strip()
+    if year:
+        out["year"] = year
+
+    language = str(raw_fields.get("language") or "").strip()
+    if language:
+        out["language"] = language
+
+    authors_raw = str(raw_fields.get("author") or "").strip()
+    if authors_raw:
+        out["authors"] = _split_listish_text(authors_raw)
+
+    # ISBN: extract all tokens (some pages include multiple).
+    isbn_raw = str(raw_fields.get("isbn") or "").strip()
+    if isbn_raw:
+        isbns = _extract_isbns(isbn_raw)
+        if isbns:
+            out["isbn"] = isbns
+
+    tags_raw = str(raw_fields.get("tags") or "").strip()
+    if tags_raw:
+        # Keep these as freeform tags (split on commas/semicolons/pipes).
+        out["tags"] = _split_listish_text(tags_raw)
+
+    return out
+
+
+def _extract_anchor_texts(raw_html: str) -> List[str]:
+    out: List[str] = []
+    for m in re.finditer(r"(?is)<a\b[^>]*>(.*?)</a>", str(raw_html or "")):
+        t = _strip_html_to_text(m.group(1))
+        if t:
+            out.append(t)
+    # De-dupe, preserve order
+    seen: set[str] = set()
+    uniq: List[str] = []
+    for x in out:
+        k = x.strip()
+        if not k:
+            continue
+        if k.lower() in seen:
+            continue
+        seen.add(k.lower())
+        uniq.append(k)
+    return uniq
+
+
+def _split_listish_text(value: str) -> List[str]:
+    s = str(value or "").strip()
+    if not s:
+        return []
+    parts = re.split(r"\s*(?:,|;|\|)\s*", s)
+    out: List[str] = []
+    for p in parts:
+        p = str(p or "").strip()
+        if p:
+            out.append(p)
+    return out
+
+
+def _extract_isbns(text: str) -> List[str]:
+    s = str(text or "")
+    candidates = re.findall(r"\b[0-9Xx][0-9Xx\-\s]{8,20}[0-9Xx]\b", s)
+    out: List[str] = []
+    for c in candidates:
+        n = re.sub(r"[^0-9Xx]", "", c).upper()
+        if len(n) not in (10, 13):
+            continue
+        if n not in out:
+            out.append(n)
+    # Also handle already-clean tokens.
+    for c in re.findall(r"\b(?:97[89])?\d{9}[\dXx]\b", s):
+        n = str(c).upper()
+        if n not in out:
+            out.append(n)
+    return out
+
+
+def _libgen_id_from_url(url: str) -> str:
+    # Handles edition.php?id=..., file.php?id=...
+    m = re.search(r"(?:\?|&)id=(\d+)", str(url or ""), flags=re.IGNORECASE)
+    return str(m.group(1)) if m else ""
+
+
+def _prefer_isbn(isbns: List[str]) -> str:
+    vals = [str(x or "").strip() for x in (isbns or []) if str(x or "").strip()]
+    # Prefer ISBN-13, then ISBN-10.
+    for v in vals:
+        if len(v) == 13:
+            return v
+    for v in vals:
+        if len(v) == 10:
+            return v
+    return vals[0] if vals else ""
+
+
+def _enrich_book_tags_from_isbn(isbn: str, *, config: Optional[Dict[str, Any]] = None) -> Tuple[List[str], str]:
+    """Return (tags, source_name) for the given ISBN.
+
+    Priority:
+    1) OpenLibrary API-by-ISBN scrape (fast, structured)
+    2) isbnsearch.org scrape via MetadataProvider
+    """
+
+    isbn_clean = re.sub(r"[^0-9Xx]", "", str(isbn or "")).upper()
+    if len(isbn_clean) not in (10, 13):
+        return [], ""
+
+    # 1) OpenLibrary API lookup by ISBN (short timeout, silent failure).
+    try:
+        url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{isbn_clean}&jscmd=data&format=json"
+        resp = requests.get(url, timeout=4)
+        resp.raise_for_status()
+        data = resp.json()
+        if isinstance(data, dict) and data:
+            book_data = next(iter(data.values()), None)
+        else:
+            book_data = None
+
+        if isinstance(book_data, dict):
+            tags: List[str] = []
+
+            def _add(t: str) -> None:
+                s = str(t or "").strip()
+                if s:
+                    tags.append(s)
+
+            if book_data.get("title"):
+                _add(f"title:{book_data['title']}")
+
+            authors = book_data.get("authors")
+            if isinstance(authors, list):
+                for a in authors[:3]:
+                    if isinstance(a, dict) and a.get("name"):
+                        _add(f"author:{a['name']}")
+
+            if book_data.get("publish_date"):
+                _add(f"publish_date:{book_data['publish_date']}")
+
+            publishers = book_data.get("publishers")
+            if isinstance(publishers, list) and publishers:
+                pub0 = publishers[0]
+                if isinstance(pub0, dict) and pub0.get("name"):
+                    _add(f"publisher:{pub0['name']}")
+
+            desc = book_data.get("description")
+            if isinstance(desc, dict) and "value" in desc:
+                desc = desc.get("value")
+            if desc:
+                desc_str = str(desc).strip()
+                if desc_str:
+                    _add(f"description:{desc_str[:200]}")
+
+            pages = book_data.get("number_of_pages")
+            if isinstance(pages, int) and pages > 0:
+                _add(f"pages:{pages}")
+
+            identifiers = book_data.get("identifiers")
+            if isinstance(identifiers, dict):
+
+                def _first(value: Any) -> Any:
+                    if isinstance(value, list) and value:
+                        return value[0]
+                    return value
+
+                for key, ns in (
+                    ("openlibrary", "openlibrary"),
+                    ("lccn", "lccn"),
+                    ("oclc", "oclc"),
+                    ("goodreads", "goodreads"),
+                    ("librarything", "librarything"),
+                    ("doi", "doi"),
+                    ("internet_archive", "internet_archive"),
+                ):
+                    val = _first(identifiers.get(key))
+                    if val:
+                        _add(f"{ns}:{val}")
+
+            if not any(str(t).lower().startswith("isbn:") for t in tags):
+                tags.insert(0, f"isbn:{isbn_clean}")
+
+            # De-dupe case-insensitively, preserve order.
+            seen: set[str] = set()
+            out: List[str] = []
+            for t in tags:
+                k = str(t).strip().lower()
+                if not k or k in seen:
+                    continue
+                seen.add(k)
+                out.append(str(t).strip())
+
+            if out:
+                return out, "openlibrary"
+    except Exception:
+        pass
+
+    # 2) isbnsearch metadata provider fallback.
+    try:
+        from Provider.metadata_provider import get_metadata_provider
+
+        provider = get_metadata_provider("isbnsearch", config or {})
+        if provider is None:
+            return [], ""
+        items = provider.search(isbn_clean, limit=1)
+        if not items:
+            return [], ""
+        tags = provider.to_tags(items[0])
+        if not any(str(t).lower().startswith("isbn:") for t in tags):
+            tags = [f"isbn:{isbn_clean}"] + [str(t) for t in tags]
+        return [str(t) for t in tags if str(t).strip()], provider.name
+    except Exception:
+        return [], ""
+
+
+def _fetch_libgen_details_html(url: str, *, timeout: Optional[Tuple[float, float]] = None) -> Optional[str]:
+    try:
+        if timeout is None:
+            timeout = (DEFAULT_CONNECT_TIMEOUT, DEFAULT_READ_TIMEOUT)
+        session = requests.Session()
+        session.headers.update(
+            {
+                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36",
+            }
+        )
+        with session.get(str(url), stream=True, timeout=timeout) as resp:
+            resp.raise_for_status()
+            ct = str(resp.headers.get("Content-Type", "")).lower()
+            if "text/html" not in ct:
+                return None
+            return resp.text
+    except Exception:
+        return None
+
+
+def _parse_libgen_details_html(html: str) -> Dict[str, Any]:
+    """Parse LibGen details-page HTML (edition.php/file.php) into a metadata dict.
+
+    Best-effort and intentionally tolerant of mirror variations.
+    """
+
+    out: Dict[str, Any] = {}
+    raw_fields: Dict[str, str] = {}
+    s = str(html or "")
+
+    # Fast path: try to pull simple Label/Value table rows.
+    for m in re.finditer(
+        r"(?is)<tr\b[^>]*>\s*<t[dh]\b[^>]*>\s*([^<]{1,80}?)\s*:??\s*</t[dh]>\s*<t[dh]\b[^>]*>(.*?)</t[dh]>\s*</tr>",
+        s,
+    ):
+        label = _strip_html_to_text(m.group(1))
+        raw_val_html = str(m.group(2) or "")
+        if not label:
+            continue
+        val_text = _strip_html_to_text(raw_val_html)
+        if not val_text:
+            continue
+        raw_fields[label] = val_text
+
+        norm = re.sub(r"[^a-z0-9]+", "_", label.strip().lower()).strip("_")
+        if not norm:
+            continue
+
+        # Prefer anchors for multi-valued fields.
+        anchors = _extract_anchor_texts(raw_val_html)
+        if anchors:
+            out[norm] = anchors
+        else:
+            out[norm] = val_text
+
+    # Some libgen.gl edition pages group metadata as repeated blocks like:
+    #   <strong>Title:</strong>
+    #   The Title
+    # We'll parse those too (best-effort, no DOM required).
+    strong_matches = list(re.finditer(r"(?is)<strong\b[^>]*>(.*?)</strong>", s))
+    if strong_matches:
+        for idx, m in enumerate(strong_matches):
+            label_raw = _strip_html_to_text(m.group(1))
+            label = str(label_raw or "").strip()
+            if not label:
+                continue
+
+            # Normalize label (strip trailing colon if present).
+            if label.endswith(":"):
+                label = label[:-1].strip()
+
+            chunk_start = m.end()
+            chunk_end = strong_matches[idx + 1].start() if (idx + 1) < len(strong_matches) else len(s)
+            raw_val_html = s[chunk_start:chunk_end]
+
+            # If we already have a value for this label from a table row, keep it.
+            if label in raw_fields:
+                continue
+
+            val_text = _strip_html_to_text(raw_val_html)
+            if not val_text:
+                continue
+
+            raw_fields[label] = val_text
+
+            norm = re.sub(r"[^a-z0-9]+", "_", label.strip().lower()).strip("_")
+            if not norm:
+                continue
+
+            anchors = _extract_anchor_texts(raw_val_html)
+            if anchors:
+                out[norm] = anchors
+            else:
+                out[norm] = val_text
+
+    # Normalize keys of interest.
+    def _first_str(v: Any) -> str:
+        if isinstance(v, list) and v:
+            return str(v[0] or "").strip()
+        return str(v or "").strip()
+
+    title = _first_str(out.get("title"))
+    if title:
+        out["title"] = title
+
+    authors = out.get("author_s") or out.get("authors") or out.get("author")
+    if isinstance(authors, str):
+        authors_list = _split_listish_text(authors)
+    elif isinstance(authors, list):
+        authors_list = [str(x).strip() for x in authors if str(x).strip()]
+    else:
+        authors_list = []
+    if authors_list:
+        out["authors"] = authors_list
+
+    publisher = _first_str(out.get("publisher"))
+    if publisher:
+        out["publisher"] = publisher
+
+    year = _first_str(out.get("year"))
+    if year:
+        out["year"] = year
+
+    language = _first_str(out.get("language"))
+    if language:
+        out["language"] = language
+
+    oclc = _first_str(out.get("oclc_worldcat")) or _first_str(out.get("oclc"))
+    if oclc:
+        m_oclc = re.search(r"\b\d{5,15}\b", oclc)
+        out["oclc"] = str(m_oclc.group(0)) if m_oclc else oclc
+
+    tags_val = out.get("tags")
+    if isinstance(tags_val, list):
+        tags_list = [str(x).strip() for x in tags_val if str(x).strip()]
+    elif isinstance(tags_val, str):
+        tags_list = _split_listish_text(tags_val)
+    else:
+        tags_list = []
+    if tags_list:
+        out["tags"] = tags_list
+
+    isbn_val = out.get("isbn")
+    isbn_text = ""
+    if isinstance(isbn_val, list):
+        isbn_text = " ".join([str(x) for x in isbn_val if x])
+    else:
+        isbn_text = str(isbn_val or "")
+    isbns = _extract_isbns(isbn_text)
+    if isbns:
+        out["isbn"] = isbns
+
+    edition_id = _first_str(out.get("edition_id"))
+    if edition_id:
+        m_eid = re.search(r"\b\d+\b", edition_id)
+        out["edition_id"] = str(m_eid.group(0)) if m_eid else edition_id
+
+    if raw_fields:
+        out["_raw_fields"] = raw_fields
+
+    return out
+
+
+def _libgen_metadata_to_tags(meta: Dict[str, Any]) -> List[str]:
+    tags: List[str] = []
+    seen: set[str] = set()
+
+    def _add(t: str) -> None:
+        s = str(t or "").strip()
+        if not s:
+            return
+        k = s.lower()
+        if k in seen:
+            return
+        seen.add(k)
+        tags.append(s)
+
+    title = str(meta.get("title") or "").strip()
+    if title:
+        _add(f"title:{title}")
+
+    for a in meta.get("authors") or []:
+        a = str(a or "").strip()
+        if a:
+            _add(f"author:{a}")
+
+    publisher = str(meta.get("publisher") or "").strip()
+    if publisher:
+        _add(f"publisher:{publisher}")
+
+    year = str(meta.get("year") or "").strip()
+    if year:
+        _add(f"year:{year}")
+
+    language = str(meta.get("language") or "").strip()
+    if language:
+        _add(f"language:{language}")
+
+    for isbn in meta.get("isbn") or []:
+        isbn = str(isbn or "").strip().replace("-", "")
+        if isbn:
+            _add(f"isbn:{isbn}")
+
+    oclc = str(meta.get("oclc") or "").strip()
+    if oclc:
+        _add(f"oclc:{oclc}")
+
+    edition_id = str(meta.get("edition_id") or "").strip()
+    if edition_id:
+        _add(f"libgen_edition_id:{edition_id}")
+
+    # Freeform tags (no "tags:" prefix).
+    for t in meta.get("tags") or []:
+        t = str(t or "").strip()
+        if t:
+            _add(t)
+
+    # Any additional structured fields we captured are preserved under a libgen_ namespace.
+    raw_fields = meta.get("_raw_fields")
+    if isinstance(raw_fields, dict):
+        for k, v in raw_fields.items():
+            lk = str(k or "").strip().lower()
+            if lk in {"title", "author(s)", "authors", "author", "publisher", "year", "isbn", "language", "oclc/worldcat", "tags", "edition id"}:
+                continue
+            vv = str(v or "").strip()
+            if not vv:
+                continue
+            ns = re.sub(r"[^a-z0-9]+", "_", lk).strip("_")
+            if ns:
+                _add(f"libgen_{ns}:{vv}")
+
+    return tags
+
+
 class Libgen(Provider):
    # Domains that should be routed to this provider when the user supplies a URL.
    # (Used by ProviderCore.registry.match_provider_name_for_url)
@@ -146,6 +736,15 @@ class Libgen(Provider):
            md5 = str(md.get("md5") or "").strip()
            extension = str(md.get("extension") or "").strip().lstrip(".")

+            # If the user passed ads.php/get.php directly, capture md5 from the URL so
+            # filenames are stable (avoid always writing `libgen.pdf`).
+            if (not md5) and isinstance(target, str) and target.startswith("http"):
+                md5 = _libgen_md5_from_url(target)
+                if md5:
+                    md["md5"] = md5
+
+            # Defer LibGen details-page metadata and ISBN enrichment until AFTER the file is downloaded.
+
            if (not target) or target.startswith("libgen:"):
                if md5 and re.fullmatch(r"[a-fA-F0-9]{32}", md5):
                    target = urljoin(MIRRORS[0], f"/ads.php?md5={md5}")
@@ -153,7 +752,10 @@ class Libgen(Provider):
            if not target:
                return None

-            base_name = sanitize_filename(title or md5 or "libgen")
+            if title and title.startswith("http"):
+                title = ""
+
+            base_name = sanitize_filename(title or md5 or (f"libgen_{_libgen_id_from_url(target)}" if _libgen_id_from_url(target) else "libgen"))
            out_path = output_dir / base_name
            if extension:
                out_path = out_path.with_suffix(f".{extension}")
@@ -190,6 +792,122 @@ class Libgen(Provider):
            ok, final_path = download_from_mirror(target, out_path, progress_callback=progress_callback)
            progress_bar.finish()
            if ok and final_path:
+                # After the download completes, best-effort fetch details metadata (title + ISBN)
+                # and then enrich tags via OpenLibrary/isbnsearch. This ensures enrichment never
+                # blocks the download itself.
+                try:
+                    if isinstance(target, str) and target.startswith("http"):
+                        low = target.lower()
+                        # Preferred: ads.php pages often embed a complete tag block.
+                        # Parse it post-download (best-effort) and do NOT perform external
+                        # enrichment (OpenLibrary/isbnsearch) unless the user later chooses to.
+                        if ("/ads.php" in low) or ("/get.php" in low):
+                            ads_url = target if "/ads.php" in low else _libgen_ads_url_for_target(target)
+                            if ads_url:
+                                html = _fetch_libgen_details_html(ads_url, timeout=(DEFAULT_CONNECT_TIMEOUT, 4.0))
+                                if html:
+                                    meta = _parse_libgen_ads_tags_html(html)
+                                    extracted_title = str(meta.get("title") or "").strip()
+                                    if extracted_title:
+                                        md["title"] = extracted_title
+                                        result.tag.add(f"title:{extracted_title}")
+                                        if (not title) or title.startswith("http"):
+                                            title = extracted_title
+
+                                    authors = meta.get("authors") if isinstance(meta.get("authors"), list) else []
+                                    for a in (authors or []):
+                                        aa = str(a or "").strip()
+                                        if aa:
+                                            result.tag.add(f"author:{aa}")
+
+                                    publisher = str(meta.get("publisher") or "").strip()
+                                    if publisher:
+                                        md["publisher"] = publisher
+                                        result.tag.add(f"publisher:{publisher}")
+
+                                    year = str(meta.get("year") or "").strip()
+                                    if year:
+                                        md["year"] = year
+                                        result.tag.add(f"year:{year}")
+
+                                    language = str(meta.get("language") or "").strip()
+                                    if language:
+                                        md["language"] = language
+                                        result.tag.add(f"language:{language}")
+
+                                    isbns = meta.get("isbn") if isinstance(meta.get("isbn"), list) else []
+                                    isbns = [str(x).strip() for x in (isbns or []) if str(x).strip()]
+                                    if isbns:
+                                        md["isbn"] = isbns
+                                        for isbn_val in isbns:
+                                            result.tag.add(f"isbn:{isbn_val}")
+
+                                    free_tags = meta.get("tags") if isinstance(meta.get("tags"), list) else []
+                                    for t in (free_tags or []):
+                                        tt = str(t or "").strip()
+                                        if tt:
+                                            result.tag.add(tt)
+
+                                    # Preserve any other extracted fields (namespaced).
+                                    raw_fields = meta.get("_raw_fields")
+                                    if isinstance(raw_fields, dict):
+                                        for k, v in raw_fields.items():
+                                            lk = str(k or "").strip().lower()
+                                            if lk in {"title", "author", "authors", "publisher", "year", "isbn", "language", "tags"}:
+                                                continue
+                                            vv = str(v or "").strip()
+                                            if not vv:
+                                                continue
+                                            ns = re.sub(r"[^a-z0-9]+", "_", lk).strip("_")
+                                            if ns:
+                                                result.tag.add(f"libgen_{ns}:{vv}")
+
+                        # Legacy: edition/file/series details pages (title + ISBN) + external enrichment.
+                        if ("/edition.php" in low) or ("/file.php" in low) or ("/series.php" in low):
+                            html = _fetch_libgen_details_html(target)
+                            if html:
+                                meta = _parse_libgen_details_html(html)
+
+                                if not meta.get("edition_id"):
+                                    eid = _libgen_id_from_url(target)
+                                    if eid:
+                                        meta["edition_id"] = eid
+
+                                extracted_title = str(meta.get("title") or "").strip()
+                                extracted_isbns = meta.get("isbn") if isinstance(meta.get("isbn"), list) else []
+                                extracted_isbns = [str(x).strip() for x in (extracted_isbns or []) if str(x).strip()]
+
+                                if extracted_title:
+                                    md["title"] = extracted_title
+                                    result.tag.add(f"title:{extracted_title}")
+                                if extracted_isbns:
+                                    md["isbn"] = extracted_isbns
+                                    for isbn_val in extracted_isbns:
+                                        isbn_norm = str(isbn_val).strip().replace("-", "")
+                                        if isbn_norm:
+                                            result.tag.add(f"isbn:{isbn_norm}")
+                                if meta.get("edition_id"):
+                                    md["edition_id"] = str(meta.get("edition_id"))
+
+                                preferred_isbn = _prefer_isbn(extracted_isbns)
+                                if preferred_isbn:
+                                    enriched_tags, enriched_source = _enrich_book_tags_from_isbn(
+                                        preferred_isbn,
+                                        config=getattr(self, "config", None),
+                                    )
+                                    if enriched_tags:
+                                        try:
+                                            result.tag.update(set(enriched_tags))
+                                        except Exception:
+                                            pass
+                                    if enriched_source:
+                                        md["metadata_enriched_from"] = enriched_source
+
+                                if extracted_title and ((not title) or title.startswith("http")):
+                                    title = extracted_title
+                except Exception:
+                    pass
+
                return Path(final_path)
            return None
        except Exception:
@@ -751,6 +1469,34 @@ def _resolve_download_url(
        if not html:
            return None

+        # LibGen chain helpers (for environments without lxml).
+        # Typical chain:
+        #   edition.php?id=...  -> file.php?id=...
+        #   file.php?id=...     -> ads.php?md5=... (or get.php?md5=...)
+        #   ads.php?md5=...     -> get.php?md5=...
+        #   get.php?md5=...     -> file response
+
+        # Handle edition -> file links.
+        m = re.search(r'href=["\']([^"\']*file\.php\?id=\d+[^"\']*)["\']', html, flags=re.IGNORECASE)
+        if m:
+            href = str(m.group(1) or "").strip()
+            if href and not href.lower().startswith("javascript:"):
+                return urljoin(base_url, href)
+
+        # Handle series -> edition links.
+        m = re.search(r'href=["\']([^"\']*edition\.php\?id=\d+[^"\']*)["\']', html, flags=re.IGNORECASE)
+        if m:
+            href = str(m.group(1) or "").strip()
+            if href and not href.lower().startswith("javascript:"):
+                return urljoin(base_url, href)
+
+        # Handle file -> ads/get links (sometimes present as the "Libgen" mirror).
+        m = re.search(r'href=["\']([^"\']*ads\.php\?md5=[a-fA-F0-9]{32}[^"\']*)["\']', html, flags=re.IGNORECASE)
+        if m:
+            href = str(m.group(1) or "").strip()
+            if href and not href.lower().startswith("javascript:"):
+                return urljoin(base_url, href)
+
        # Prefer explicit get.php md5 links (most common successful chain).
        m = re.search(r'href=["\']([^"\']*get\.php\?md5=[a-fA-F0-9]{32}[^"\']*)["\']', html, flags=re.IGNORECASE)
        if m:
--- a/Provider/metadata_provider.py
+++ b/Provider/metadata_provider.py
@@ -2,6 +2,8 @@ from __future__ import annotations

 from abc import ABC, abstractmethod
 from typing import Any, Dict, List, Optional, Type, cast
+import html as html_std
+import re
 import requests
 import sys
 import json
@@ -279,6 +281,199 @@ class GoogleBooksMetadataProvider(MetadataProvider):
        return tags


+class ISBNsearchMetadataProvider(MetadataProvider):
+    """Metadata provider that scrapes isbnsearch.org by ISBN.
+
+    This is a best-effort HTML scrape. It expects the query to be an ISBN.
+    """
+
+    @property
+    def name(self) -> str:  # type: ignore[override]
+        return "isbnsearch"
+
+    @staticmethod
+    def _strip_html_to_text(raw: str) -> str:
+        s = html_std.unescape(str(raw or ""))
+        s = re.sub(r"(?i)<br\s*/?>", "\n", s)
+        s = re.sub(r"<[^>]+>", " ", s)
+        s = re.sub(r"\s+", " ", s)
+        return s.strip()
+
+    @staticmethod
+    def _clean_isbn(query: str) -> str:
+        s = str(query or "").strip()
+        if not s:
+            return ""
+        s = s.replace("isbn:", "").replace("ISBN:", "")
+        s = re.sub(r"[^0-9Xx]", "", s).upper()
+        if len(s) in (10, 13):
+            return s
+        # Try to locate an ISBN-like token inside the query.
+        m = re.search(r"\b(?:97[89])?\d{9}[\dXx]\b", s)
+        return str(m.group(0)).upper() if m else ""
+
+    def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
+        _ = limit
+        isbn = self._clean_isbn(query)
+        if not isbn:
+            return []
+
+        url = f"https://isbnsearch.org/isbn/{isbn}"
+        try:
+            resp = requests.get(url, timeout=10)
+            resp.raise_for_status()
+            html = str(resp.text or "")
+            if not html:
+                return []
+        except Exception as exc:
+            log(f"ISBNsearch scrape failed: {exc}", file=sys.stderr)
+            return []
+
+        title = ""
+        m_title = re.search(r"(?is)<h1\b[^>]*>(.*?)</h1>", html)
+        if m_title:
+            title = self._strip_html_to_text(m_title.group(1))
+
+        raw_fields: Dict[str, str] = {}
+        strong_matches = list(re.finditer(r"(?is)<strong\b[^>]*>(.*?)</strong>", html))
+        for idx, m in enumerate(strong_matches):
+            label_raw = self._strip_html_to_text(m.group(1))
+            label = str(label_raw or "").strip()
+            if not label:
+                continue
+            if label.endswith(":"):
+                label = label[:-1].strip()
+
+            chunk_start = m.end()
+            # Stop at next <strong> or end of document.
+            chunk_end = strong_matches[idx + 1].start() if (idx + 1) < len(strong_matches) else len(html)
+            chunk = html[chunk_start:chunk_end]
+            # Prefer stopping within the same paragraph when possible.
+            m_end = re.search(r"(?is)(</p>|<br\s*/?>)", chunk)
+            if m_end:
+                chunk = chunk[: m_end.start()]
+
+            val_text = self._strip_html_to_text(chunk)
+            if not val_text:
+                continue
+            raw_fields[label] = val_text
+
+        def _get(*labels: str) -> str:
+            for lab in labels:
+                for k, v in raw_fields.items():
+                    if str(k).strip().lower() == str(lab).strip().lower():
+                        return str(v or "").strip()
+            return ""
+
+        # Map common ISBNsearch labels.
+        author_text = _get("Author", "Authors", "Author(s)")
+        publisher = _get("Publisher")
+        published = _get("Published", "Publication Date", "Publish Date")
+        language = _get("Language")
+        pages = _get("Pages")
+        isbn_13 = _get("ISBN-13", "ISBN13")
+        isbn_10 = _get("ISBN-10", "ISBN10")
+
+        year = ""
+        if published:
+            m_year = re.search(r"\b(\d{4})\b", published)
+            year = str(m_year.group(1)) if m_year else ""
+
+        authors: List[str] = []
+        if author_text:
+            # Split on common separators; keep multi-part names intact.
+            for part in re.split(r"\s*(?:,|;|\band\b|\&|\|)\s*", author_text, flags=re.IGNORECASE):
+                p = str(part or "").strip()
+                if p:
+                    authors.append(p)
+
+        # Prefer parsed title, but fall back to og:title if needed.
+        if not title:
+            m_og = re.search(r"(?is)<meta\b[^>]*property=['\"]og:title['\"][^>]*content=['\"](.*?)['\"][^>]*>", html)
+            if m_og:
+                title = self._strip_html_to_text(m_og.group(1))
+
+        # Ensure ISBN tokens are normalized.
+        isbn_tokens: List[str] = []
+        for token in [isbn_13, isbn_10, isbn]:
+            t = self._clean_isbn(token)
+            if t and t not in isbn_tokens:
+                isbn_tokens.append(t)
+
+        item: Dict[str, Any] = {
+            "title": title or "",
+            # Keep UI columns compatible with the generic metadata table.
+            "artist": ", ".join(authors) if authors else "",
+            "album": publisher or "",
+            "year": year or "",
+            "provider": self.name,
+            "authors": authors,
+            "publisher": publisher or "",
+            "language": language or "",
+            "pages": pages or "",
+            "identifiers": {
+                "isbn_13": next((t for t in isbn_tokens if len(t) == 13), None),
+                "isbn_10": next((t for t in isbn_tokens if len(t) == 10), None),
+            },
+            "raw_fields": raw_fields,
+        }
+
+        # Only return usable items.
+        if not item.get("title") and not any(item["identifiers"].values()):
+            return []
+
+        return [item]
+
+    def to_tags(self, item: Dict[str, Any]) -> List[str]:
+        tags: List[str] = []
+
+        title = str(item.get("title") or "").strip()
+        if title:
+            tags.append(f"title:{title}")
+
+        authors = item.get("authors") or []
+        if isinstance(authors, list):
+            for a in authors:
+                a = str(a or "").strip()
+                if a:
+                    tags.append(f"author:{a}")
+
+        publisher = str(item.get("publisher") or "").strip()
+        if publisher:
+            tags.append(f"publisher:{publisher}")
+
+        year = str(item.get("year") or "").strip()
+        if year:
+            tags.append(f"year:{year}")
+
+        language = str(item.get("language") or "").strip()
+        if language:
+            tags.append(f"language:{language}")
+
+        identifiers = item.get("identifiers") or {}
+        if isinstance(identifiers, dict):
+            for key in ("isbn_13", "isbn_10"):
+                val = identifiers.get(key)
+                if val:
+                    tags.append(f"isbn:{val}")
+
+        tags.append(f"source:{self.name}")
+
+        # Dedup case-insensitively, preserve order.
+        seen: set[str] = set()
+        out: List[str] = []
+        for t in tags:
+            s = str(t or "").strip()
+            if not s:
+                continue
+            k = s.lower()
+            if k in seen:
+                continue
+            seen.add(k)
+            out.append(s)
+        return out
+
+
 class MusicBrainzMetadataProvider(MetadataProvider):
    """Metadata provider for MusicBrainz recordings."""

@@ -517,6 +712,7 @@ _METADATA_PROVIDERS: Dict[str, Type[MetadataProvider]] = {
    "openlibrary": OpenLibraryMetadataProvider,
    "googlebooks": GoogleBooksMetadataProvider,
    "google": GoogleBooksMetadataProvider,
+    "isbnsearch": ISBNsearchMetadataProvider,
    "musicbrainz": MusicBrainzMetadataProvider,
    "ytdlp": YtdlpMetadataProvider,
 }
--- a/Provider/openlibrary.py
+++ b/Provider/openlibrary.py
@@ -190,6 +190,7 @@ def _archive_id_from_url(url: str) -> str:
    u = str(url or "").strip()
    if not u:
        return ""
+
    try:
        p = urlparse(u)
        host = (p.hostname or "").lower().strip()
@@ -215,6 +216,175 @@ def _archive_id_from_url(url: str) -> str:
    return ""


+def _coerce_archive_field_list(value: Any) -> List[str]:
+    """Coerce an Archive.org metadata field to a list of strings."""
+    if value is None:
+        return []
+    if isinstance(value, list):
+        out: List[str] = []
+        for v in value:
+            try:
+                s = str(v).strip()
+            except Exception:
+                continue
+            if s:
+                out.append(s)
+        return out
+    if isinstance(value, (tuple, set)):
+        out = []
+        for v in value:
+            try:
+                s = str(v).strip()
+            except Exception:
+                continue
+            if s:
+                out.append(s)
+        return out
+    try:
+        s = str(value).strip()
+    except Exception:
+        return []
+    return [s] if s else []
+
+
+def _archive_item_metadata_to_tags(archive_id: str, item_metadata: Dict[str, Any]) -> List[str]:
+    """Map Archive.org metadata JSON (the `metadata` object) to tag strings.
+
+    This is intentionally best-effort and conservative: it focuses on stable,
+    useful bibliographic fields (title/author/publisher/ISBN/identifier/topics).
+    """
+    archive_id_clean = str(archive_id or "").strip()
+    meta = item_metadata if isinstance(item_metadata, dict) else {}
+
+    tags: List[str] = []
+    seen: set[str] = set()
+
+    def _add(tag: str) -> None:
+        try:
+            t = str(tag).strip()
+        except Exception:
+            return
+        if not t:
+            return
+        if t.lower() in seen:
+            return
+        seen.add(t.lower())
+        tags.append(t)
+
+    if archive_id_clean:
+        _add(f"internet_archive:{archive_id_clean}")
+
+    # Title
+    for title in _coerce_archive_field_list(meta.get("title"))[:1]:
+        _add(f"title:{title}")
+
+    # Authors/creators
+    creators: List[str] = []
+    creators.extend(_coerce_archive_field_list(meta.get("creator")))
+    creators.extend(_coerce_archive_field_list(meta.get("author")))
+    for creator in creators[:3]:
+        _add(f"author:{creator}")
+
+    # Publisher
+    for publisher in _coerce_archive_field_list(meta.get("publisher"))[:3]:
+        _add(f"publisher:{publisher}")
+
+    # Publish date/year
+    for date_val in _coerce_archive_field_list(meta.get("date"))[:1]:
+        _add(f"publish_date:{date_val}")
+    for year_val in _coerce_archive_field_list(meta.get("year"))[:1]:
+        _add(f"publish_date:{year_val}")
+
+    # Language
+    for lang in _coerce_archive_field_list(meta.get("language"))[:3]:
+        _add(f"language:{lang}")
+
+    # Topics/subjects: follow existing OpenLibrary behavior (un-namespaced tags)
+    for subj in _coerce_archive_field_list(meta.get("subject"))[:15]:
+        if len(subj) > 200:
+            subj = subj[:200]
+        _add(subj)
+
+    # ISBNs and identifiers
+    def _clean_isbn(raw: str) -> str:
+        return str(raw or "").replace("-", "").strip()
+
+    for isbn in _coerce_archive_field_list(meta.get("isbn"))[:10]:
+        isbn_clean = _clean_isbn(isbn)
+        if isbn_clean:
+            _add(f"isbn:{isbn_clean}")
+
+    identifiers: List[str] = []
+    identifiers.extend(_coerce_archive_field_list(meta.get("identifier")))
+    identifiers.extend(_coerce_archive_field_list(meta.get("external-identifier")))
+    added_other = 0
+    for ident in identifiers:
+        ident_s = str(ident or "").strip()
+        if not ident_s:
+            continue
+        low = ident_s.lower()
+
+        if low.startswith("urn:isbn:"):
+            val = _clean_isbn(ident_s.split(":", 2)[-1])
+            if val:
+                _add(f"isbn:{val}")
+            continue
+        if low.startswith("isbn:"):
+            val = _clean_isbn(ident_s.split(":", 1)[-1])
+            if val:
+                _add(f"isbn:{val}")
+            continue
+        if low.startswith("urn:oclc:"):
+            val = ident_s.split(":", 2)[-1].strip()
+            if val:
+                _add(f"oclc:{val}")
+            continue
+        if low.startswith("oclc:"):
+            val = ident_s.split(":", 1)[-1].strip()
+            if val:
+                _add(f"oclc:{val}")
+            continue
+        if low.startswith("urn:lccn:"):
+            val = ident_s.split(":", 2)[-1].strip()
+            if val:
+                _add(f"lccn:{val}")
+            continue
+        if low.startswith("lccn:"):
+            val = ident_s.split(":", 1)[-1].strip()
+            if val:
+                _add(f"lccn:{val}")
+            continue
+        if low.startswith("doi:"):
+            val = ident_s.split(":", 1)[-1].strip()
+            if val:
+                _add(f"doi:{val}")
+            continue
+
+        if archive_id_clean and low == archive_id_clean.lower():
+            continue
+        if added_other >= 5:
+            continue
+        if len(ident_s) > 200:
+            ident_s = ident_s[:200]
+        _add(f"identifier:{ident_s}")
+        added_other += 1
+
+    return tags
+
+
+def _fetch_archive_item_metadata(archive_id: str, *, timeout: int = 8) -> Dict[str, Any]:
+    ident = str(archive_id or "").strip()
+    if not ident:
+        return {}
+    resp = requests.get(f"https://archive.org/metadata/{ident}", timeout=int(timeout))
+    resp.raise_for_status()
+    data = resp.json() if resp is not None else {}
+    if not isinstance(data, dict):
+        return {}
+    meta = data.get("metadata")
+    return meta if isinstance(meta, dict) else {}
+
+
 class OpenLibrary(Provider):
    # Domains that should be routed to this provider when the user supplies a URL.
    # (Used by ProviderCore.registry.match_provider_name_for_url)
@@ -1046,6 +1216,25 @@ class OpenLibrary(Provider):
            log("[openlibrary] No archive identifier available; cannot download", file=sys.stderr)
            return None

+        # Best-effort metadata scrape to attach bibliographic tags for downstream cmdlets.
+        try:
+            archive_meta = _fetch_archive_item_metadata(archive_id)
+            tags = _archive_item_metadata_to_tags(archive_id, archive_meta)
+            if tags:
+                try:
+                    result.tag.update(tags)
+                except Exception:
+                    # Fallback for callers that pass plain dicts.
+                    pass
+            if isinstance(meta, dict):
+                meta["archive_id"] = archive_id
+                if archive_meta:
+                    meta["archive_metadata"] = archive_meta
+                result.full_metadata = meta
+        except Exception:
+            # Never block downloads on metadata fetch.
+            pass
+
        safe_title = sanitize_filename(result.title)
        if not safe_title or "http" in safe_title.lower():
            safe_title = sanitize_filename(archive_id) or "archive"
--- a/Provider/soulseek.py
+++ b/Provider/soulseek.py
@@ -306,36 +306,41 @@ class Soulseek(Provider):
 		client = SoulSeekClient(settings)

 		with _suppress_aioslsk_noise():
-			try:
-				await client.start()
-				await client.login()
-			except Exception as exc:
-				log(f"[soulseek] Login failed: {type(exc).__name__}: {exc}", file=sys.stderr)
-				return []
+			async with _suppress_aioslsk_asyncio_task_noise():
+				try:
+					await client.start()
+					await client.login()
+				except Exception as exc:
+					log(f"[soulseek] Login failed: {type(exc).__name__}: {exc}", file=sys.stderr)
+					return []

-			try:
-				search_request = await client.searches.search(query)
-				await self._collect_results(search_request, timeout=timeout)
-				return self._flatten_results(search_request)[:limit]
-			except Exception as exc:
-				log(f"[soulseek] Search error: {type(exc).__name__}: {exc}", file=sys.stderr)
-				return []
-			finally:
-				# Best-effort: try to cancel/close the search request before stopping
-				# the client to reduce stray reply spam.
 				try:
-					if "search_request" in locals() and search_request is not None:
-						cancel = getattr(search_request, "cancel", None)
-						if callable(cancel):
-							maybe = cancel()
-							if asyncio.iscoroutine(maybe):
-								await maybe
-				except Exception:
-					pass
-				try:
-					await client.stop()
-				except Exception:
-					pass
+					search_request = await client.searches.search(query)
+					await self._collect_results(search_request, timeout=timeout)
+					return self._flatten_results(search_request)[:limit]
+				except Exception as exc:
+					log(f"[soulseek] Search error: {type(exc).__name__}: {exc}", file=sys.stderr)
+					return []
+				finally:
+					# Best-effort: try to cancel/close the search request before stopping
+					# the client to reduce stray reply spam.
+					try:
+						if "search_request" in locals() and search_request is not None:
+							cancel = getattr(search_request, "cancel", None)
+							if callable(cancel):
+								maybe = cancel()
+								if asyncio.iscoroutine(maybe):
+									await maybe
+					except Exception:
+						pass
+					try:
+						await client.stop()
+					except Exception:
+						pass
+					try:
+						await asyncio.sleep(0)
+					except Exception:
+						pass

 	def _flatten_results(self, search_request: Any) -> List[dict]:
 		flat: List[dict] = []
@@ -558,93 +563,101 @@ async def download_soulseek_file(
 		async def _attempt_once(attempt_num: int) -> tuple[Optional[Path], Any, int, float]:
 			client = SoulSeekClient(settings)
 			with _suppress_aioslsk_noise():
-				try:
-					async with _suppress_aioslsk_asyncio_task_noise():
-						await client.start()
-					await client.login()
-					debug(f"[soulseek] Logged in as {login_user}")
-
-					log(
-						f"[soulseek] Download attempt {attempt_num}: {username} :: {local_filename}",
-						file=sys.stderr,
-					)
-					debug(f"[soulseek] Requesting download from {username}: {filename}")
-
-					transfer = await client.transfers.add(Transfer(username, filename, TransferDirection.DOWNLOAD))
-					transfer.local_path = str(output_path)
-					await client.transfers.queue(transfer)
-
-					start_time = time.time()
-					last_progress_time = start_time
-					progress_bar = ProgressBar()
-
-					while not transfer.is_finalized():
-						elapsed = time.time() - start_time
-						if elapsed > timeout:
-							log(f"[soulseek] Download timeout after {timeout}s", file=sys.stderr)
-							bytes_done = int(getattr(transfer, "bytes_transfered", 0) or 0)
-							state_val = getattr(getattr(transfer, "state", None), "VALUE", None)
-							progress_bar.finish()
-							return None, state_val, bytes_done, elapsed
-
-						bytes_done = int(getattr(transfer, "bytes_transfered", 0) or 0)
-						total_bytes = int(getattr(transfer, "filesize", 0) or 0)
-						now = time.time()
-						if now - last_progress_time >= 0.5:
-							progress_bar.update(
-								downloaded=bytes_done,
-								total=total_bytes if total_bytes > 0 else None,
-								label="download",
-								file=sys.stderr,
-							)
-							last_progress_time = now
-
-						await asyncio.sleep(1)
-
-					final_state = getattr(getattr(transfer, "state", None), "VALUE", None)
-					downloaded_path = Path(transfer.local_path) if getattr(transfer, "local_path", None) else output_path
-					final_elapsed = time.time() - start_time
-
-					# Clear in-place progress bar.
-					progress_bar.finish()
-
-					# If a file was written, treat it as success even if state is odd.
+				async with _suppress_aioslsk_asyncio_task_noise():
 					try:
-						if downloaded_path.exists() and downloaded_path.stat().st_size > 0:
-							if final_state != TransferState.COMPLETE:
-								log(
-									f"[soulseek] Transfer finalized as {final_state}, but file exists ({downloaded_path.stat().st_size} bytes). Keeping file.",
+						await client.start()
+						await client.login()
+						debug(f"[soulseek] Logged in as {login_user}")
+
+						log(
+							f"[soulseek] Download attempt {attempt_num}: {username} :: {local_filename}",
+							file=sys.stderr,
+						)
+						debug(f"[soulseek] Requesting download from {username}: {filename}")
+
+						transfer = await client.transfers.add(Transfer(username, filename, TransferDirection.DOWNLOAD))
+						transfer.local_path = str(output_path)
+						await client.transfers.queue(transfer)
+
+						start_time = time.time()
+						last_progress_time = start_time
+						progress_bar = ProgressBar()
+
+						while not transfer.is_finalized():
+							elapsed = time.time() - start_time
+							if elapsed > timeout:
+								log(f"[soulseek] Download timeout after {timeout}s", file=sys.stderr)
+								bytes_done = int(getattr(transfer, "bytes_transfered", 0) or 0)
+								state_val = getattr(getattr(transfer, "state", None), "VALUE", None)
+								progress_bar.finish()
+								return None, state_val, bytes_done, elapsed
+
+							bytes_done = int(getattr(transfer, "bytes_transfered", 0) or 0)
+							total_bytes = int(getattr(transfer, "filesize", 0) or 0)
+							now = time.time()
+							if now - last_progress_time >= 0.5:
+								progress_bar.update(
+									downloaded=bytes_done,
+									total=total_bytes if total_bytes > 0 else None,
+									label="download",
 									file=sys.stderr,
 								)
+								last_progress_time = now
+
+							await asyncio.sleep(1)
+
+						final_state = getattr(getattr(transfer, "state", None), "VALUE", None)
+						downloaded_path = (
+							Path(transfer.local_path)
+							if getattr(transfer, "local_path", None)
+							else output_path
+						)
+						final_elapsed = time.time() - start_time
+
+						# Clear in-place progress bar.
+						progress_bar.finish()
+
+						# If a file was written, treat it as success even if state is odd.
+						try:
+							if downloaded_path.exists() and downloaded_path.stat().st_size > 0:
+								if final_state != TransferState.COMPLETE:
+									log(
+										f"[soulseek] Transfer finalized as {final_state}, but file exists ({downloaded_path.stat().st_size} bytes). Keeping file.",
+										file=sys.stderr,
+									)
+								return downloaded_path, final_state, int(downloaded_path.stat().st_size), final_elapsed
+						except Exception:
+							pass
+
+						if final_state == TransferState.COMPLETE and downloaded_path.exists():
+							debug(f"[soulseek] Download complete: {downloaded_path}")
 							return downloaded_path, final_state, int(downloaded_path.stat().st_size), final_elapsed
-					except Exception:
-						pass

-					if final_state == TransferState.COMPLETE and downloaded_path.exists():
-						debug(f"[soulseek] Download complete: {downloaded_path}")
-						return downloaded_path, final_state, int(downloaded_path.stat().st_size), final_elapsed
+						fail_bytes = int(getattr(transfer, "bytes_transfered", 0) or 0)
+						fail_total = int(getattr(transfer, "filesize", 0) or 0)
+						reason = getattr(transfer, "reason", None)
+						log(
+							f"[soulseek] Download failed: state={final_state} bytes={fail_bytes}/{fail_total} reason={reason}",
+							file=sys.stderr,
+						)

-					fail_bytes = int(getattr(transfer, "bytes_transfered", 0) or 0)
-					fail_total = int(getattr(transfer, "filesize", 0) or 0)
-					reason = getattr(transfer, "reason", None)
-					log(
-						f"[soulseek] Download failed: state={final_state} bytes={fail_bytes}/{fail_total} reason={reason}",
-						file=sys.stderr,
-					)
-
-					# Clean up 0-byte placeholder.
-					try:
-						if downloaded_path.exists() and downloaded_path.stat().st_size == 0:
-							downloaded_path.unlink(missing_ok=True)
-					except Exception:
-						pass
-					return None, final_state, fail_bytes, final_elapsed
-
-				finally:
-					try:
-						await client.stop()
-					except Exception:
-						pass
+						# Clean up 0-byte placeholder.
+						try:
+							if downloaded_path.exists() and downloaded_path.stat().st_size == 0:
+								downloaded_path.unlink(missing_ok=True)
+						except Exception:
+							pass
+						return None, final_state, fail_bytes, final_elapsed
+					finally:
+						try:
+							await client.stop()
+						except Exception:
+							pass
+						# Let cancellation/cleanup callbacks run while our exception handler is still installed.
+						try:
+							await asyncio.sleep(0)
+						except Exception:
+							pass

 		# Retry a couple times only for fast 0-byte failures (common transient case).
 		max_attempts = 3