From 3c1910922292876e635f7a7ccf8407909b0e6c3c Mon Sep 17 00:00:00 2001 From: Nose Date: Wed, 21 Jan 2026 14:06:18 -0800 Subject: [PATCH] f --- cmdlet/download_file.py | 24 ++++++++++++++++++++---- tool/ytdlp.py | 34 ++++++++++++++++++++++++++++++++-- 2 files changed, 52 insertions(+), 6 deletions(-) diff --git a/cmdlet/download_file.py b/cmdlet/download_file.py index 23409ef..ff38a84 100644 --- a/cmdlet/download_file.py +++ b/cmdlet/download_file.py @@ -1126,12 +1126,20 @@ class Download_File(Cmdlet): ) -> bool: to_check = [] if candidate_url: - to_check.append(candidate_url) + to_check.append(str(candidate_url)) if extra_urls: - to_check.extend(extra_urls) + to_check.extend([str(u) for u in extra_urls if u]) + + # De-duplicate needles to avoid redundant DB searches. + seen = set() + unique_to_check = [] + for u in to_check: + if u not in seen: + unique_to_check.append(u) + seen.add(u) return sh.check_url_exists_in_storage( - urls=to_check, + urls=unique_to_check, storage=storage, hydrus_available=hydrus_available, final_output_dir=final_output_dir @@ -1146,8 +1154,16 @@ class Download_File(Cmdlet): final_output_dir: Path, **kwargs: Any, ) -> bool: + if not urls: + return True + unique_urls = [] + seen = set() + for u in urls: + if u and u not in seen: + unique_urls.append(u) + seen.add(u) return sh.check_url_exists_in_storage( - urls=urls, + urls=unique_urls, storage=storage, hydrus_available=hydrus_available, final_output_dir=final_output_dir diff --git a/tool/ytdlp.py b/tool/ytdlp.py index 5ac5605..3da8d47 100644 --- a/tool/ytdlp.py +++ b/tool/ytdlp.py @@ -226,6 +226,8 @@ def is_url_supported_by_ytdlp(url: str) -> bool: return False +_FORMATS_CACHE: Dict[str, tuple[float, List[Dict[str, Any]]]] = {} + def list_formats( url: str, *, @@ -242,6 +244,14 @@ def list_formats( if not is_url_supported_by_ytdlp(url): return None + # Cache format probes to avoid redundant network hits + cache_key = hashlib.md5(f"{url}|{no_playlist}|{playlist_items}|{cookiefile}".encode()).hexdigest() + now = time.monotonic() + if cache_key in _FORMATS_CACHE: + ts, result = _FORMATS_CACHE[cache_key] + if now - ts < 300: # 5 minute cache for formats + return result + result_container: List[Optional[Any]] = [None, None] # [result, error] def _do_list() -> None: @@ -262,7 +272,7 @@ def list_formats( ydl_opts["cookiefile"] = str(cookiefile) else: # Best effort attempt to use browser cookies if no file is explicitly passed - ydl_opts["cookiesfrombrowser"] = "chrome" + ydl_opts["cookiesfrombrowser"] = ["chrome"] if no_playlist: ydl_opts["noplaylist"] = True @@ -302,9 +312,14 @@ def list_formats( if result_container[1] is not None: return None + if result_container[0] is not None: + _FORMATS_CACHE[cache_key] = (now, cast(List[Dict[str, Any]], result_container[0])) + return cast(Optional[List[Dict[str, Any]]], result_container[0]) +_PROBE_CACHE: Dict[str, tuple[float, Dict[str, Any]]] = {} + def probe_url( url: str, no_playlist: bool = False, @@ -320,6 +335,14 @@ def probe_url( if not is_url_supported_by_ytdlp(url): return None + # Simple in-memory cache to avoid duplicate probes for the same URL/options in a short window. + cache_key = hashlib.md5(f"{url}|{no_playlist}|{cookiefile}".encode()).hexdigest() + now = time.monotonic() + if cache_key in _PROBE_CACHE: + ts, result = _PROBE_CACHE[cache_key] + if now - ts < 60: # 60 second cache + return result + result_container: List[Optional[Any]] = [None, None] # [result, error] def _do_probe() -> None: @@ -340,6 +363,10 @@ def probe_url( if cookiefile: ydl_opts["cookiefile"] = str(cookiefile) + else: + # Best effort fallback + ydl_opts["cookiesfrombrowser"] = ["chrome"] + if no_playlist: ydl_opts["noplaylist"] = True @@ -381,6 +408,9 @@ def probe_url( if result_container[1] is not None: return None + if result_container[0] is not None: + _PROBE_CACHE[cache_key] = (now, cast(Dict[str, Any], result_container[0])) + return cast(Optional[Dict[str, Any]], result_container[0]) @@ -678,7 +708,7 @@ class YtDlpTool: # Add browser cookies support "just in case" if no file found (best effort) # This uses yt-dlp's support for extracting from common browsers. # Defaulting to 'chrome' as the most common path. - base_options["cookiesfrombrowser"] = "chrome" + base_options["cookiesfrombrowser"] = ["chrome"] # Special handling for format keywords if opts.ytdl_format == "audio":