This commit is contained in:
nose
2025-12-05 03:42:57 -08:00
parent 5e4df11dbf
commit 5482ee5586
20 changed files with 911 additions and 223 deletions

View File

@@ -41,7 +41,8 @@ from config import resolve_output_dir
from metadata import (
fetch_openlibrary_metadata_tags,
format_playlist_entry,
extract_ytdlp_tags
extract_ytdlp_tags,
build_book_tags,
)
# ============================================================================
@@ -1499,12 +1500,19 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
metadata = item.get('full_metadata', {}) if isinstance(item.get('full_metadata'), dict) else {}
mirrors = metadata.get('mirrors', {})
book_id = metadata.get('book_id', '')
author = metadata.get('author')
isbn_val = metadata.get('isbn')
year_val = metadata.get('year')
if url:
url_entry = {
'url': str(url),
'mirrors': mirrors, # Alternative mirrors for fallback
'book_id': book_id,
'title': title,
'author': author,
'isbn': isbn_val,
'year': year_val,
}
urls_to_download.append(url_entry)
debug(f"[search-result] LibGen: '{title}'")
@@ -1700,12 +1708,19 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
metadata = getattr(item, 'full_metadata', {}) if isinstance(getattr(item, 'full_metadata', None), dict) else {}
mirrors = metadata.get('mirrors', {})
book_id = metadata.get('book_id', '')
author = metadata.get('author')
isbn_val = metadata.get('isbn')
year_val = metadata.get('year')
if url:
url_entry = {
'url': str(url),
'mirrors': mirrors, # Alternative mirrors for fallback
'book_id': book_id,
'title': title,
'author': author,
'isbn': isbn_val,
'year': year_val,
}
urls_to_download.append(url_entry)
else:
@@ -2177,6 +2192,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
primary_url = url.get('url')
mirrors_dict = url.get('mirrors', {})
book_id = url.get('book_id', '')
title_val = url.get('title')
author_val = url.get('author')
isbn_val = url.get('isbn')
year_val = url.get('year')
if not primary_url:
debug(f"Skipping libgen entry: no primary URL")
@@ -2219,39 +2238,82 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
# Use libgen_service's download_from_mirror for proper libgen handling
from helper.libgen_service import download_from_mirror
# Generate filename from book_id and title
safe_title = "".join(c for c in str(title or "book") if c.isalnum() or c in (' ', '.', '-'))[:100]
file_path = final_output_dir / f"{safe_title}_{book_id}.pdf"
progress_bar = models.ProgressBar()
progress_start = time.time()
last_update = [progress_start]
progress_bytes = [0]
progress_total = [0]
def _libgen_progress(downloaded: int, total: int) -> None:
progress_bytes[0] = downloaded
progress_total[0] = total
now = time.time()
if total > 0 and now - last_update[0] >= 0.5:
percent = (downloaded / total) * 100
elapsed = max(now - progress_start, 1e-6)
speed = downloaded / elapsed if elapsed > 0 else 0
remaining = max(total - downloaded, 0)
eta = remaining / speed if speed > 0 else 0
minutes, seconds = divmod(int(eta), 60)
hours, minutes = divmod(minutes, 60)
eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
speed_str = f"{progress_bar.format_bytes(speed)}/s"
progress_line = progress_bar.format_progress(
percent_str=f"{percent:.1f}%",
downloaded=downloaded,
total=total,
speed_str=speed_str,
eta_str=eta_str,
)
debug(f" {progress_line}")
last_update[0] = now
# Attempt download using libgen's native function
success = download_from_mirror(
success, downloaded_path = download_from_mirror(
mirror_url=mirror_url,
output_path=file_path,
log_info=lambda msg: debug(f" {msg}"),
log_error=lambda msg: debug(f"{msg}")
log_error=lambda msg: debug(f"{msg}"),
progress_callback=_libgen_progress,
)
if success and file_path.exists():
final_path = Path(downloaded_path) if downloaded_path else file_path
if success and final_path.exists():
downloaded = progress_bytes[0] or final_path.stat().st_size
elapsed = time.time() - progress_start
avg_speed = downloaded / elapsed if elapsed > 0 else 0
debug(f" ✓ Downloaded in {elapsed:.1f}s at {progress_bar.format_bytes(avg_speed)}/s")
debug(f" ✓ Downloaded successfully from mirror #{mirror_idx}")
successful_mirror = mirror_url
download_succeeded = True
# Emit result for downstream cmdlets
file_hash = _compute_file_hash(file_path)
emit_tags = ['libgen', 'book']
file_hash = _compute_file_hash(final_path)
emit_tags = build_book_tags(
title=title_val or title,
author=author_val,
isbn=isbn_val,
year=year_val,
source='libgen',
extra=[f"libgen_id:{book_id}"] if book_id else None,
)
pipe_obj = create_pipe_object_result(
source='libgen',
identifier=book_id,
file_path=str(file_path),
file_path=str(final_path),
cmdlet_name='download-data',
file_hash=file_hash,
tags=emit_tags,
source_url=successful_mirror
)
pipeline_context.emit(pipe_obj)
downloaded_files.append(str(file_path))
downloaded_files.append(str(final_path))
exit_code = 0
break # Success, stop trying mirrors
@@ -2643,38 +2705,61 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
# Let's try to get metadata to make a good filename
filename = "libgen_download.bin"
title_from_results = None
author_from_results = None
year_from_results = None
if libgen_id and results:
title = results[0].get("title", "book")
title_from_results = results[0].get("title")
author_from_results = results[0].get("author")
year_from_results = results[0].get("year")
ext = results[0].get("extension", "pdf")
# Sanitize filename
safe_title = "".join(c for c in title if c.isalnum() or c in (' ', '-', '_')).strip()
safe_title = "".join(c for c in (title_from_results or "book") if c.isalnum() or c in (' ', '-', '_')).strip()
filename = f"{safe_title}.{ext}"
elif "series.php" in url:
filename = f"series_{re.search(r'id=(\d+)', url).group(1) if re.search(r'id=(\d+)', url) else 'unknown'}.pdf"
output_path = final_output_dir / filename
if download_from_mirror(url, output_path, log_info=debug, log_error=log):
debug(f"✓ LibGen download successful: {output_path}")
success, downloaded_path = download_from_mirror(
url,
output_path,
log_info=debug,
log_error=log,
)
final_file = Path(downloaded_path) if downloaded_path else output_path
if success and final_file.exists():
debug(f"✓ LibGen download successful: {final_file}")
# Create a result object
info = {
"id": libgen_id or "libgen",
"title": filename,
"webpage_url": url,
"ext": output_path.suffix.lstrip("."),
"ext": final_file.suffix.lstrip("."),
}
emit_tags = build_book_tags(
title=title_from_results or filename,
author=author_from_results,
year=year_from_results,
source="libgen",
extra=[f"libgen_id:{libgen_id}"] if libgen_id else None,
)
file_hash = _compute_file_hash(final_file)
# Emit result
pipeline_context.emit(create_pipe_object_result(
source="libgen",
identifier=libgen_id or "libgen",
file_path=str(output_path),
file_path=str(final_file),
cmdlet_name="download-data",
title=filename,
file_hash=file_hash,
tags=emit_tags,
extra=info
))
downloaded_files.append(str(output_path))
downloaded_files.append(str(final_file))
continue
else:
debug("⚠ LibGen specialized download failed, falling back to generic downloader...")