Add YAPF style + ignore, and format tracked Python files

This commit is contained in:
2025-12-29 18:42:02 -08:00
parent c019c00aed
commit 507946a3e4
108 changed files with 11664 additions and 6494 deletions

View File

@@ -99,8 +99,11 @@ def is_url_supported_by_ytdlp(url: str) -> bool:
def list_formats(
url: str, no_playlist: bool = False, playlist_items: Optional[str] = None
) -> Optional[List[Dict[str, Any]]]:
url: str,
no_playlist: bool = False,
playlist_items: Optional[str] = None
) -> Optional[List[Dict[str,
Any]]]:
"""Get list of available formats for a URL using yt-dlp."""
_ensure_yt_dlp_ready()
@@ -130,15 +133,21 @@ def list_formats(
for fmt in formats:
result_formats.append(
{
"format_id": fmt.get("format_id", ""),
"format": fmt.get("format", ""),
"ext": fmt.get("ext", ""),
"resolution": fmt.get("resolution", ""),
"format_id": fmt.get("format_id",
""),
"format": fmt.get("format",
""),
"ext": fmt.get("ext",
""),
"resolution": fmt.get("resolution",
""),
"width": fmt.get("width"),
"height": fmt.get("height"),
"fps": fmt.get("fps"),
"vcodec": fmt.get("vcodec", "none"),
"acodec": fmt.get("acodec", "none"),
"vcodec": fmt.get("vcodec",
"none"),
"acodec": fmt.get("acodec",
"none"),
"filesize": fmt.get("filesize"),
"tbr": fmt.get("tbr"),
}
@@ -153,8 +162,14 @@ def list_formats(
def _download_with_sections_via_cli(
url: str, ytdl_options: Dict[str, Any], sections: List[str], quiet: bool = False
) -> tuple[Optional[str], Dict[str, Any]]:
url: str,
ytdl_options: Dict[str,
Any],
sections: List[str],
quiet: bool = False
) -> tuple[Optional[str],
Dict[str,
Any]]:
"""Download each section separately so merge-file can combine them.
yt-dlp with multiple --download-sections args merges them into one file.
@@ -174,7 +189,8 @@ def _download_with_sections_via_cli(
# Generate a unique hash-based ID for this download session
# This ensures different videos/downloads don't have filename collisions
session_id = hashlib.md5(
(url + str(time.time()) + "".join(random.choices(string.ascii_letters, k=10))).encode()
(url + str(time.time()) + "".join(random.choices(string.ascii_letters,
k=10))).encode()
).hexdigest()[:12]
first_section_info = None
@@ -207,7 +223,11 @@ def _download_with_sections_via_cli(
metadata_cmd.append(url)
try:
meta_result = subprocess.run(metadata_cmd, capture_output=True, text=True)
meta_result = subprocess.run(
metadata_cmd,
capture_output=True,
text=True
)
if meta_result.returncode == 0 and meta_result.stdout:
try:
info_dict = json.loads(meta_result.stdout.strip())
@@ -253,7 +273,9 @@ def _download_with_sections_via_cli(
cmd.append(url)
if not quiet:
debug(f"Running yt-dlp for section {section_idx}/{len(sections_list)}: {section}")
debug(
f"Running yt-dlp for section {section_idx}/{len(sections_list)}: {section}"
)
debug(f"Command: {' '.join(cmd)}")
# Run the subprocess - don't capture output so progress is shown
@@ -280,24 +302,26 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
# When downloading sections, each section will have .section_N_of_M added by _download_with_sections_via_cli
outtmpl = str((opts.output_dir / "%(title)s.%(ext)s").resolve())
base_options: Dict[str, Any] = {
"outtmpl": outtmpl,
"quiet": True,
"no_warnings": True,
"noprogress": True,
"socket_timeout": 30,
"retries": 10,
"fragment_retries": 10,
"http_chunk_size": 10_485_760,
"restrictfilenames": True,
"progress_hooks": [] if opts.quiet else [_progress_callback],
}
base_options: Dict[str,
Any] = {
"outtmpl": outtmpl,
"quiet": True,
"no_warnings": True,
"noprogress": True,
"socket_timeout": 30,
"retries": 10,
"fragment_retries": 10,
"http_chunk_size": 10_485_760,
"restrictfilenames": True,
"progress_hooks": [] if opts.quiet else [_progress_callback],
}
if opts.cookies_path and opts.cookies_path.is_file():
base_options["cookiefile"] = str(opts.cookies_path)
else:
# Fallback to browser cookies
base_options["cookiesfrombrowser"] = ("chrome",)
base_options["cookiesfrombrowser"] = ("chrome",
)
# Add no-playlist option if specified (for single video from playlist url)
if opts.no_playlist:
@@ -306,7 +330,9 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
# Configure based on mode
if opts.mode == "audio":
base_options["format"] = opts.ytdl_format or "251/140/bestaudio"
base_options["postprocessors"] = [{"key": "FFmpegExtractAudio"}]
base_options["postprocessors"] = [{
"key": "FFmpegExtractAudio"
}]
else: # video
base_options["format"] = opts.ytdl_format or "bestvideo+bestaudio/best"
base_options["format_sort"] = [
@@ -396,7 +422,11 @@ def _candidate_paths(entry: Dict[str, Any], output_dir: Path) -> Iterator[Path]:
yield output_dir / entry["filename"]
def _resolve_entry_and_path(info: Dict[str, Any], output_dir: Path) -> tuple[Dict[str, Any], Path]:
def _resolve_entry_and_path(info: Dict[str,
Any],
output_dir: Path) -> tuple[Dict[str,
Any],
Path]:
"""Find downloaded file in yt-dlp metadata."""
for entry in _iter_download_entries(info):
for candidate in _candidate_paths(entry, output_dir):
@@ -454,7 +484,10 @@ def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
# LibGen redirects to actual mirrors, follow redirects to get final URL
session = requests.Session()
session.headers.update(
{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
{
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
}
)
debug(f"Following LibGen redirect chain for: {libgen_url}")
@@ -479,30 +512,36 @@ def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
continue
href_lower = href.lower()
if "get.php" in href_lower or href_lower.endswith(
(".pdf", ".epub", ".djvu", ".mobi")
):
if "get.php" in href_lower or href_lower.endswith((".pdf",
".epub",
".djvu",
".mobi")):
download_url = (
href if href.startswith("http") else urljoin(final_url, href)
href if href.startswith("http") else
urljoin(final_url,
href)
)
debug(f"Found download link: {download_url}")
return download_url
else:
# Regex fallback
for m in re.finditer(
r"href=[\"\']([^\"\']+)[\"\']",
response.text or "",
flags=re.IGNORECASE,
r"href=[\"\']([^\"\']+)[\"\']",
response.text or "",
flags=re.IGNORECASE,
):
href = str(m.group(1) or "").strip()
if not href or href.lower().startswith("javascript:"):
continue
href_lower = href.lower()
if "get.php" in href_lower or href_lower.endswith(
(".pdf", ".epub", ".djvu", ".mobi")
):
if "get.php" in href_lower or href_lower.endswith((".pdf",
".epub",
".djvu",
".mobi")):
download_url = (
href if href.startswith("http") else urljoin(final_url, href)
href if href.startswith("http") else
urljoin(final_url,
href)
)
debug(f"Found download link: {download_url}")
return download_url
@@ -616,13 +655,17 @@ def _download_direct_file(
response = client._request("HEAD", url, follow_redirects=True)
content_disposition = response.headers.get("content-disposition", "")
try:
content_type = str(response.headers.get("content-type", "") or "").strip().lower()
content_type = str(response.headers.get("content-type",
"") or "").strip().lower()
except Exception:
content_type = ""
if content_disposition:
# Extract filename from Content-Disposition header
# Format: attachment; filename="filename.pdf" or filename=filename.pdf
match = re.search(r'filename\*?=(?:"([^"]*)"|([^;\s]*))', content_disposition)
match = re.search(
r'filename\*?=(?:"([^"]*)"|([^;\s]*))',
content_disposition
)
if match:
extracted_name = match.group(1) or match.group(2)
if extracted_name:
@@ -638,7 +681,11 @@ def _download_direct_file(
# servers block/lie on HEAD, and a URL path like `edition.php` would otherwise
# be saved as a bogus file.
try:
page_like_exts = {".php", ".asp", ".aspx", ".jsp", ".cgi"}
page_like_exts = {".php",
".asp",
".aspx",
".jsp",
".cgi"}
ext = ""
try:
ext = Path(str(filename or "")).suffix.lower()
@@ -653,13 +700,14 @@ def _download_direct_file(
with client._request_stream("GET", url, follow_redirects=True) as resp:
resp.raise_for_status()
ct = (
str(resp.headers.get("content-type", "") or "")
.split(";", 1)[0]
.strip()
.lower()
str(resp.headers.get("content-type",
"") or "").split(";",
1)[0].strip().lower()
)
if ct.startswith("text/html"):
raise DownloadError("URL appears to be an HTML page, not a direct file")
raise DownloadError(
"URL appears to be an HTML page, not a direct file"
)
except DownloadError:
raise
except Exception:
@@ -722,7 +770,8 @@ def _download_direct_file(
# Prefer pipeline transfer bars when a Live UI is active.
use_pipeline_transfer = False
try:
if pipeline_progress is not None and hasattr(pipeline_progress, "update_transfer"):
if pipeline_progress is not None and hasattr(pipeline_progress,
"update_transfer"):
ui = None
if hasattr(pipeline_progress, "ui_and_pipe_index"):
ui, _ = pipeline_progress.ui_and_pipe_index() # type: ignore[attr-defined]
@@ -753,15 +802,16 @@ def _download_direct_file(
try:
total_val: Optional[int] = (
int(content_length)
if isinstance(content_length, int) and content_length > 0
else None
if isinstance(content_length,
int) and content_length > 0 else None
)
except Exception:
total_val = None
try:
if hasattr(pipeline_progress, "begin_transfer"):
pipeline_progress.begin_transfer(
label=str(filename or "download"), total=total_val
label=str(filename or "download"),
total=total_val
)
transfer_started[0] = True
except Exception:
@@ -773,16 +823,18 @@ def _download_direct_file(
# Update pipeline transfer bar when present.
try:
if pipeline_progress is not None and hasattr(pipeline_progress, "update_transfer"):
if pipeline_progress is not None and hasattr(pipeline_progress,
"update_transfer"):
_maybe_begin_transfer(content_length)
total_val: Optional[int] = (
int(content_length)
if isinstance(content_length, int) and content_length > 0
else None
if isinstance(content_length,
int) and content_length > 0 else None
)
pipeline_progress.update_transfer(
label=str(filename or "download"),
completed=int(bytes_downloaded) if bytes_downloaded is not None else None,
completed=int(bytes_downloaded)
if bytes_downloaded is not None else None,
total=total_val,
)
except Exception:
@@ -796,12 +848,17 @@ def _download_direct_file(
return
elapsed = now - start_time
percent = (bytes_downloaded / content_length) * 100 if content_length > 0 else 0
percent = (
bytes_downloaded / content_length
) * 100 if content_length > 0 else 0
speed = bytes_downloaded / elapsed if elapsed > 0 else 0
eta_str: Optional[str] = None
if content_length > 0 and speed > 0:
try:
eta_seconds = max(0.0, float(content_length - bytes_downloaded) / float(speed))
eta_seconds = max(
0.0,
float(content_length - bytes_downloaded) / float(speed)
)
minutes, seconds = divmod(int(eta_seconds), 60)
hours, minutes = divmod(minutes, 60)
eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
@@ -832,11 +889,9 @@ def _download_direct_file(
pass
try:
if (
pipeline_progress is not None
and transfer_started[0]
and hasattr(pipeline_progress, "finish_transfer")
):
if (pipeline_progress is not None and transfer_started[0]
and hasattr(pipeline_progress,
"finish_transfer")):
pipeline_progress.finish_transfer(label=str(filename or "download"))
except Exception:
pass
@@ -844,8 +899,9 @@ def _download_direct_file(
try:
if progress_bar is not None:
avg_speed_str = (
progress_bar.format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0)
+ "/s"
progress_bar.
format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0) +
"/s"
)
else:
avg_speed_str = f"{(downloaded_bytes[0] / elapsed if elapsed > 0 else 0):.1f} B/s"
@@ -864,7 +920,8 @@ def _download_direct_file(
ext = ""
info = {
"id": str(filename).rsplit(".", 1)[0] if "." in str(filename) else str(filename),
"id": str(filename).rsplit(".",
1)[0] if "." in str(filename) else str(filename),
"ext": ext,
"webpage_url": url,
}
@@ -897,7 +954,11 @@ def _download_direct_file(
if debug_logger is not None:
debug_logger.write_record(
"direct-file-downloaded",
{"url": url, "path": str(file_path), "hash": hash_value},
{
"url": url,
"path": str(file_path),
"hash": hash_value
},
)
return DownloadMediaResult(
@@ -915,11 +976,9 @@ def _download_direct_file(
except Exception:
pass
try:
if (
pipeline_progress is not None
and transfer_started[0]
and hasattr(pipeline_progress, "finish_transfer")
):
if (pipeline_progress is not None and transfer_started[0]
and hasattr(pipeline_progress,
"finish_transfer")):
pipeline_progress.finish_transfer(label=str(filename or "download"))
except Exception:
pass
@@ -927,7 +986,11 @@ def _download_direct_file(
if debug_logger is not None:
debug_logger.write_record(
"exception",
{"phase": "direct-file", "url": url, "error": str(exc)},
{
"phase": "direct-file",
"url": url,
"error": str(exc)
},
)
raise DownloadError(f"Failed to download {url}: {exc}") from exc
except Exception as exc:
@@ -937,11 +1000,9 @@ def _download_direct_file(
except Exception:
pass
try:
if (
pipeline_progress is not None
and transfer_started[0]
and hasattr(pipeline_progress, "finish_transfer")
):
if (pipeline_progress is not None and transfer_started[0]
and hasattr(pipeline_progress,
"finish_transfer")):
pipeline_progress.finish_transfer(label=str(filename or "download"))
except Exception:
pass
@@ -959,9 +1020,10 @@ def _download_direct_file(
raise DownloadError(f"Error downloading file: {exc}") from exc
def probe_url(
url: str, no_playlist: bool = False, timeout_seconds: int = 15
) -> Optional[Dict[str, Any]]:
def probe_url(url: str,
no_playlist: bool = False,
timeout_seconds: int = 15) -> Optional[Dict[str,
Any]]:
"""Probe URL to extract metadata WITHOUT downloading.
Args:
@@ -1032,7 +1094,9 @@ def probe_url(
if thread.is_alive():
# Probe timed out - return None to fall back to direct download
debug(f"Probe timeout for {url} (>={timeout_seconds}s), proceeding with download")
debug(
f"Probe timeout for {url} (>={timeout_seconds}s), proceeding with download"
)
return None
if result_container[1] is not None: