Add YAPF style + ignore, and format tracked Python files
This commit is contained in:
228
SYS/download.py
228
SYS/download.py
@@ -99,8 +99,11 @@ def is_url_supported_by_ytdlp(url: str) -> bool:
|
||||
|
||||
|
||||
def list_formats(
|
||||
url: str, no_playlist: bool = False, playlist_items: Optional[str] = None
|
||||
) -> Optional[List[Dict[str, Any]]]:
|
||||
url: str,
|
||||
no_playlist: bool = False,
|
||||
playlist_items: Optional[str] = None
|
||||
) -> Optional[List[Dict[str,
|
||||
Any]]]:
|
||||
"""Get list of available formats for a URL using yt-dlp."""
|
||||
_ensure_yt_dlp_ready()
|
||||
|
||||
@@ -130,15 +133,21 @@ def list_formats(
|
||||
for fmt in formats:
|
||||
result_formats.append(
|
||||
{
|
||||
"format_id": fmt.get("format_id", ""),
|
||||
"format": fmt.get("format", ""),
|
||||
"ext": fmt.get("ext", ""),
|
||||
"resolution": fmt.get("resolution", ""),
|
||||
"format_id": fmt.get("format_id",
|
||||
""),
|
||||
"format": fmt.get("format",
|
||||
""),
|
||||
"ext": fmt.get("ext",
|
||||
""),
|
||||
"resolution": fmt.get("resolution",
|
||||
""),
|
||||
"width": fmt.get("width"),
|
||||
"height": fmt.get("height"),
|
||||
"fps": fmt.get("fps"),
|
||||
"vcodec": fmt.get("vcodec", "none"),
|
||||
"acodec": fmt.get("acodec", "none"),
|
||||
"vcodec": fmt.get("vcodec",
|
||||
"none"),
|
||||
"acodec": fmt.get("acodec",
|
||||
"none"),
|
||||
"filesize": fmt.get("filesize"),
|
||||
"tbr": fmt.get("tbr"),
|
||||
}
|
||||
@@ -153,8 +162,14 @@ def list_formats(
|
||||
|
||||
|
||||
def _download_with_sections_via_cli(
|
||||
url: str, ytdl_options: Dict[str, Any], sections: List[str], quiet: bool = False
|
||||
) -> tuple[Optional[str], Dict[str, Any]]:
|
||||
url: str,
|
||||
ytdl_options: Dict[str,
|
||||
Any],
|
||||
sections: List[str],
|
||||
quiet: bool = False
|
||||
) -> tuple[Optional[str],
|
||||
Dict[str,
|
||||
Any]]:
|
||||
"""Download each section separately so merge-file can combine them.
|
||||
|
||||
yt-dlp with multiple --download-sections args merges them into one file.
|
||||
@@ -174,7 +189,8 @@ def _download_with_sections_via_cli(
|
||||
# Generate a unique hash-based ID for this download session
|
||||
# This ensures different videos/downloads don't have filename collisions
|
||||
session_id = hashlib.md5(
|
||||
(url + str(time.time()) + "".join(random.choices(string.ascii_letters, k=10))).encode()
|
||||
(url + str(time.time()) + "".join(random.choices(string.ascii_letters,
|
||||
k=10))).encode()
|
||||
).hexdigest()[:12]
|
||||
|
||||
first_section_info = None
|
||||
@@ -207,7 +223,11 @@ def _download_with_sections_via_cli(
|
||||
metadata_cmd.append(url)
|
||||
|
||||
try:
|
||||
meta_result = subprocess.run(metadata_cmd, capture_output=True, text=True)
|
||||
meta_result = subprocess.run(
|
||||
metadata_cmd,
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
if meta_result.returncode == 0 and meta_result.stdout:
|
||||
try:
|
||||
info_dict = json.loads(meta_result.stdout.strip())
|
||||
@@ -253,7 +273,9 @@ def _download_with_sections_via_cli(
|
||||
cmd.append(url)
|
||||
|
||||
if not quiet:
|
||||
debug(f"Running yt-dlp for section {section_idx}/{len(sections_list)}: {section}")
|
||||
debug(
|
||||
f"Running yt-dlp for section {section_idx}/{len(sections_list)}: {section}"
|
||||
)
|
||||
debug(f"Command: {' '.join(cmd)}")
|
||||
|
||||
# Run the subprocess - don't capture output so progress is shown
|
||||
@@ -280,24 +302,26 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
|
||||
# When downloading sections, each section will have .section_N_of_M added by _download_with_sections_via_cli
|
||||
outtmpl = str((opts.output_dir / "%(title)s.%(ext)s").resolve())
|
||||
|
||||
base_options: Dict[str, Any] = {
|
||||
"outtmpl": outtmpl,
|
||||
"quiet": True,
|
||||
"no_warnings": True,
|
||||
"noprogress": True,
|
||||
"socket_timeout": 30,
|
||||
"retries": 10,
|
||||
"fragment_retries": 10,
|
||||
"http_chunk_size": 10_485_760,
|
||||
"restrictfilenames": True,
|
||||
"progress_hooks": [] if opts.quiet else [_progress_callback],
|
||||
}
|
||||
base_options: Dict[str,
|
||||
Any] = {
|
||||
"outtmpl": outtmpl,
|
||||
"quiet": True,
|
||||
"no_warnings": True,
|
||||
"noprogress": True,
|
||||
"socket_timeout": 30,
|
||||
"retries": 10,
|
||||
"fragment_retries": 10,
|
||||
"http_chunk_size": 10_485_760,
|
||||
"restrictfilenames": True,
|
||||
"progress_hooks": [] if opts.quiet else [_progress_callback],
|
||||
}
|
||||
|
||||
if opts.cookies_path and opts.cookies_path.is_file():
|
||||
base_options["cookiefile"] = str(opts.cookies_path)
|
||||
else:
|
||||
# Fallback to browser cookies
|
||||
base_options["cookiesfrombrowser"] = ("chrome",)
|
||||
base_options["cookiesfrombrowser"] = ("chrome",
|
||||
)
|
||||
|
||||
# Add no-playlist option if specified (for single video from playlist url)
|
||||
if opts.no_playlist:
|
||||
@@ -306,7 +330,9 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
|
||||
# Configure based on mode
|
||||
if opts.mode == "audio":
|
||||
base_options["format"] = opts.ytdl_format or "251/140/bestaudio"
|
||||
base_options["postprocessors"] = [{"key": "FFmpegExtractAudio"}]
|
||||
base_options["postprocessors"] = [{
|
||||
"key": "FFmpegExtractAudio"
|
||||
}]
|
||||
else: # video
|
||||
base_options["format"] = opts.ytdl_format or "bestvideo+bestaudio/best"
|
||||
base_options["format_sort"] = [
|
||||
@@ -396,7 +422,11 @@ def _candidate_paths(entry: Dict[str, Any], output_dir: Path) -> Iterator[Path]:
|
||||
yield output_dir / entry["filename"]
|
||||
|
||||
|
||||
def _resolve_entry_and_path(info: Dict[str, Any], output_dir: Path) -> tuple[Dict[str, Any], Path]:
|
||||
def _resolve_entry_and_path(info: Dict[str,
|
||||
Any],
|
||||
output_dir: Path) -> tuple[Dict[str,
|
||||
Any],
|
||||
Path]:
|
||||
"""Find downloaded file in yt-dlp metadata."""
|
||||
for entry in _iter_download_entries(info):
|
||||
for candidate in _candidate_paths(entry, output_dir):
|
||||
@@ -454,7 +484,10 @@ def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
|
||||
# LibGen redirects to actual mirrors, follow redirects to get final URL
|
||||
session = requests.Session()
|
||||
session.headers.update(
|
||||
{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
|
||||
{
|
||||
"User-Agent":
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
||||
}
|
||||
)
|
||||
|
||||
debug(f"Following LibGen redirect chain for: {libgen_url}")
|
||||
@@ -479,30 +512,36 @@ def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
|
||||
continue
|
||||
|
||||
href_lower = href.lower()
|
||||
if "get.php" in href_lower or href_lower.endswith(
|
||||
(".pdf", ".epub", ".djvu", ".mobi")
|
||||
):
|
||||
if "get.php" in href_lower or href_lower.endswith((".pdf",
|
||||
".epub",
|
||||
".djvu",
|
||||
".mobi")):
|
||||
download_url = (
|
||||
href if href.startswith("http") else urljoin(final_url, href)
|
||||
href if href.startswith("http") else
|
||||
urljoin(final_url,
|
||||
href)
|
||||
)
|
||||
debug(f"Found download link: {download_url}")
|
||||
return download_url
|
||||
else:
|
||||
# Regex fallback
|
||||
for m in re.finditer(
|
||||
r"href=[\"\']([^\"\']+)[\"\']",
|
||||
response.text or "",
|
||||
flags=re.IGNORECASE,
|
||||
r"href=[\"\']([^\"\']+)[\"\']",
|
||||
response.text or "",
|
||||
flags=re.IGNORECASE,
|
||||
):
|
||||
href = str(m.group(1) or "").strip()
|
||||
if not href or href.lower().startswith("javascript:"):
|
||||
continue
|
||||
href_lower = href.lower()
|
||||
if "get.php" in href_lower or href_lower.endswith(
|
||||
(".pdf", ".epub", ".djvu", ".mobi")
|
||||
):
|
||||
if "get.php" in href_lower or href_lower.endswith((".pdf",
|
||||
".epub",
|
||||
".djvu",
|
||||
".mobi")):
|
||||
download_url = (
|
||||
href if href.startswith("http") else urljoin(final_url, href)
|
||||
href if href.startswith("http") else
|
||||
urljoin(final_url,
|
||||
href)
|
||||
)
|
||||
debug(f"Found download link: {download_url}")
|
||||
return download_url
|
||||
@@ -616,13 +655,17 @@ def _download_direct_file(
|
||||
response = client._request("HEAD", url, follow_redirects=True)
|
||||
content_disposition = response.headers.get("content-disposition", "")
|
||||
try:
|
||||
content_type = str(response.headers.get("content-type", "") or "").strip().lower()
|
||||
content_type = str(response.headers.get("content-type",
|
||||
"") or "").strip().lower()
|
||||
except Exception:
|
||||
content_type = ""
|
||||
if content_disposition:
|
||||
# Extract filename from Content-Disposition header
|
||||
# Format: attachment; filename="filename.pdf" or filename=filename.pdf
|
||||
match = re.search(r'filename\*?=(?:"([^"]*)"|([^;\s]*))', content_disposition)
|
||||
match = re.search(
|
||||
r'filename\*?=(?:"([^"]*)"|([^;\s]*))',
|
||||
content_disposition
|
||||
)
|
||||
if match:
|
||||
extracted_name = match.group(1) or match.group(2)
|
||||
if extracted_name:
|
||||
@@ -638,7 +681,11 @@ def _download_direct_file(
|
||||
# servers block/lie on HEAD, and a URL path like `edition.php` would otherwise
|
||||
# be saved as a bogus file.
|
||||
try:
|
||||
page_like_exts = {".php", ".asp", ".aspx", ".jsp", ".cgi"}
|
||||
page_like_exts = {".php",
|
||||
".asp",
|
||||
".aspx",
|
||||
".jsp",
|
||||
".cgi"}
|
||||
ext = ""
|
||||
try:
|
||||
ext = Path(str(filename or "")).suffix.lower()
|
||||
@@ -653,13 +700,14 @@ def _download_direct_file(
|
||||
with client._request_stream("GET", url, follow_redirects=True) as resp:
|
||||
resp.raise_for_status()
|
||||
ct = (
|
||||
str(resp.headers.get("content-type", "") or "")
|
||||
.split(";", 1)[0]
|
||||
.strip()
|
||||
.lower()
|
||||
str(resp.headers.get("content-type",
|
||||
"") or "").split(";",
|
||||
1)[0].strip().lower()
|
||||
)
|
||||
if ct.startswith("text/html"):
|
||||
raise DownloadError("URL appears to be an HTML page, not a direct file")
|
||||
raise DownloadError(
|
||||
"URL appears to be an HTML page, not a direct file"
|
||||
)
|
||||
except DownloadError:
|
||||
raise
|
||||
except Exception:
|
||||
@@ -722,7 +770,8 @@ def _download_direct_file(
|
||||
# Prefer pipeline transfer bars when a Live UI is active.
|
||||
use_pipeline_transfer = False
|
||||
try:
|
||||
if pipeline_progress is not None and hasattr(pipeline_progress, "update_transfer"):
|
||||
if pipeline_progress is not None and hasattr(pipeline_progress,
|
||||
"update_transfer"):
|
||||
ui = None
|
||||
if hasattr(pipeline_progress, "ui_and_pipe_index"):
|
||||
ui, _ = pipeline_progress.ui_and_pipe_index() # type: ignore[attr-defined]
|
||||
@@ -753,15 +802,16 @@ def _download_direct_file(
|
||||
try:
|
||||
total_val: Optional[int] = (
|
||||
int(content_length)
|
||||
if isinstance(content_length, int) and content_length > 0
|
||||
else None
|
||||
if isinstance(content_length,
|
||||
int) and content_length > 0 else None
|
||||
)
|
||||
except Exception:
|
||||
total_val = None
|
||||
try:
|
||||
if hasattr(pipeline_progress, "begin_transfer"):
|
||||
pipeline_progress.begin_transfer(
|
||||
label=str(filename or "download"), total=total_val
|
||||
label=str(filename or "download"),
|
||||
total=total_val
|
||||
)
|
||||
transfer_started[0] = True
|
||||
except Exception:
|
||||
@@ -773,16 +823,18 @@ def _download_direct_file(
|
||||
|
||||
# Update pipeline transfer bar when present.
|
||||
try:
|
||||
if pipeline_progress is not None and hasattr(pipeline_progress, "update_transfer"):
|
||||
if pipeline_progress is not None and hasattr(pipeline_progress,
|
||||
"update_transfer"):
|
||||
_maybe_begin_transfer(content_length)
|
||||
total_val: Optional[int] = (
|
||||
int(content_length)
|
||||
if isinstance(content_length, int) and content_length > 0
|
||||
else None
|
||||
if isinstance(content_length,
|
||||
int) and content_length > 0 else None
|
||||
)
|
||||
pipeline_progress.update_transfer(
|
||||
label=str(filename or "download"),
|
||||
completed=int(bytes_downloaded) if bytes_downloaded is not None else None,
|
||||
completed=int(bytes_downloaded)
|
||||
if bytes_downloaded is not None else None,
|
||||
total=total_val,
|
||||
)
|
||||
except Exception:
|
||||
@@ -796,12 +848,17 @@ def _download_direct_file(
|
||||
return
|
||||
|
||||
elapsed = now - start_time
|
||||
percent = (bytes_downloaded / content_length) * 100 if content_length > 0 else 0
|
||||
percent = (
|
||||
bytes_downloaded / content_length
|
||||
) * 100 if content_length > 0 else 0
|
||||
speed = bytes_downloaded / elapsed if elapsed > 0 else 0
|
||||
eta_str: Optional[str] = None
|
||||
if content_length > 0 and speed > 0:
|
||||
try:
|
||||
eta_seconds = max(0.0, float(content_length - bytes_downloaded) / float(speed))
|
||||
eta_seconds = max(
|
||||
0.0,
|
||||
float(content_length - bytes_downloaded) / float(speed)
|
||||
)
|
||||
minutes, seconds = divmod(int(eta_seconds), 60)
|
||||
hours, minutes = divmod(minutes, 60)
|
||||
eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
|
||||
@@ -832,11 +889,9 @@ def _download_direct_file(
|
||||
pass
|
||||
|
||||
try:
|
||||
if (
|
||||
pipeline_progress is not None
|
||||
and transfer_started[0]
|
||||
and hasattr(pipeline_progress, "finish_transfer")
|
||||
):
|
||||
if (pipeline_progress is not None and transfer_started[0]
|
||||
and hasattr(pipeline_progress,
|
||||
"finish_transfer")):
|
||||
pipeline_progress.finish_transfer(label=str(filename or "download"))
|
||||
except Exception:
|
||||
pass
|
||||
@@ -844,8 +899,9 @@ def _download_direct_file(
|
||||
try:
|
||||
if progress_bar is not None:
|
||||
avg_speed_str = (
|
||||
progress_bar.format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0)
|
||||
+ "/s"
|
||||
progress_bar.
|
||||
format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0) +
|
||||
"/s"
|
||||
)
|
||||
else:
|
||||
avg_speed_str = f"{(downloaded_bytes[0] / elapsed if elapsed > 0 else 0):.1f} B/s"
|
||||
@@ -864,7 +920,8 @@ def _download_direct_file(
|
||||
ext = ""
|
||||
|
||||
info = {
|
||||
"id": str(filename).rsplit(".", 1)[0] if "." in str(filename) else str(filename),
|
||||
"id": str(filename).rsplit(".",
|
||||
1)[0] if "." in str(filename) else str(filename),
|
||||
"ext": ext,
|
||||
"webpage_url": url,
|
||||
}
|
||||
@@ -897,7 +954,11 @@ def _download_direct_file(
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record(
|
||||
"direct-file-downloaded",
|
||||
{"url": url, "path": str(file_path), "hash": hash_value},
|
||||
{
|
||||
"url": url,
|
||||
"path": str(file_path),
|
||||
"hash": hash_value
|
||||
},
|
||||
)
|
||||
|
||||
return DownloadMediaResult(
|
||||
@@ -915,11 +976,9 @@ def _download_direct_file(
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if (
|
||||
pipeline_progress is not None
|
||||
and transfer_started[0]
|
||||
and hasattr(pipeline_progress, "finish_transfer")
|
||||
):
|
||||
if (pipeline_progress is not None and transfer_started[0]
|
||||
and hasattr(pipeline_progress,
|
||||
"finish_transfer")):
|
||||
pipeline_progress.finish_transfer(label=str(filename or "download"))
|
||||
except Exception:
|
||||
pass
|
||||
@@ -927,7 +986,11 @@ def _download_direct_file(
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record(
|
||||
"exception",
|
||||
{"phase": "direct-file", "url": url, "error": str(exc)},
|
||||
{
|
||||
"phase": "direct-file",
|
||||
"url": url,
|
||||
"error": str(exc)
|
||||
},
|
||||
)
|
||||
raise DownloadError(f"Failed to download {url}: {exc}") from exc
|
||||
except Exception as exc:
|
||||
@@ -937,11 +1000,9 @@ def _download_direct_file(
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if (
|
||||
pipeline_progress is not None
|
||||
and transfer_started[0]
|
||||
and hasattr(pipeline_progress, "finish_transfer")
|
||||
):
|
||||
if (pipeline_progress is not None and transfer_started[0]
|
||||
and hasattr(pipeline_progress,
|
||||
"finish_transfer")):
|
||||
pipeline_progress.finish_transfer(label=str(filename or "download"))
|
||||
except Exception:
|
||||
pass
|
||||
@@ -959,9 +1020,10 @@ def _download_direct_file(
|
||||
raise DownloadError(f"Error downloading file: {exc}") from exc
|
||||
|
||||
|
||||
def probe_url(
|
||||
url: str, no_playlist: bool = False, timeout_seconds: int = 15
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
def probe_url(url: str,
|
||||
no_playlist: bool = False,
|
||||
timeout_seconds: int = 15) -> Optional[Dict[str,
|
||||
Any]]:
|
||||
"""Probe URL to extract metadata WITHOUT downloading.
|
||||
|
||||
Args:
|
||||
@@ -1032,7 +1094,9 @@ def probe_url(
|
||||
|
||||
if thread.is_alive():
|
||||
# Probe timed out - return None to fall back to direct download
|
||||
debug(f"Probe timeout for {url} (>={timeout_seconds}s), proceeding with download")
|
||||
debug(
|
||||
f"Probe timeout for {url} (>={timeout_seconds}s), proceeding with download"
|
||||
)
|
||||
return None
|
||||
|
||||
if result_container[1] is not None:
|
||||
|
||||
Reference in New Issue
Block a user