d
This commit is contained in:
@@ -234,7 +234,7 @@
|
|||||||
"ddl\\.to/([0-9a-zA-Z]{12})"
|
"ddl\\.to/([0-9a-zA-Z]{12})"
|
||||||
],
|
],
|
||||||
"regexp": "((ddownload\\.com/[0-9a-zA-Z]{12}))|(ddl\\.to/([0-9a-zA-Z]{12}))",
|
"regexp": "((ddownload\\.com/[0-9a-zA-Z]{12}))|(ddl\\.to/([0-9a-zA-Z]{12}))",
|
||||||
"status": true
|
"status": false
|
||||||
},
|
},
|
||||||
"dropapk": {
|
"dropapk": {
|
||||||
"name": "dropapk",
|
"name": "dropapk",
|
||||||
|
|||||||
@@ -2508,34 +2508,25 @@ class API_folder_store:
|
|||||||
)
|
)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
payload = text if text.endswith("\n") else f"{text}\n"
|
||||||
cursor = self.connection.cursor()
|
cursor = self.connection.cursor()
|
||||||
cursor.execute(
|
|
||||||
"SELECT stdout FROM worker WHERE worker_id = ?",
|
|
||||||
(worker_id,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
row = cursor.fetchone()
|
|
||||||
|
|
||||||
if not row:
|
|
||||||
logger.warning(f"Worker {worker_id} not found for stdout append")
|
|
||||||
return False
|
|
||||||
|
|
||||||
current_stdout = row[0] or ""
|
|
||||||
separator = (
|
|
||||||
"" if not current_stdout else
|
|
||||||
("" if current_stdout.endswith("\n") else "\n")
|
|
||||||
)
|
|
||||||
new_stdout = f"{current_stdout}{separator}{text}\n"
|
|
||||||
|
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
"""
|
"""
|
||||||
UPDATE worker SET stdout = ?, last_updated = CURRENT_TIMESTAMP,
|
UPDATE worker
|
||||||
|
SET stdout = CASE
|
||||||
|
WHEN stdout IS NULL OR stdout = '' THEN ?
|
||||||
|
WHEN substr(stdout, -1, 1) = '\n' THEN stdout || ?
|
||||||
|
ELSE stdout || '\n' || ?
|
||||||
|
END,
|
||||||
|
last_updated = CURRENT_TIMESTAMP,
|
||||||
last_stdout_at = CURRENT_TIMESTAMP
|
last_stdout_at = CURRENT_TIMESTAMP
|
||||||
WHERE worker_id = ?
|
WHERE worker_id = ?
|
||||||
""",
|
""",
|
||||||
(new_stdout,
|
(payload, payload, payload, worker_id),
|
||||||
worker_id),
|
|
||||||
)
|
)
|
||||||
|
if cursor.rowcount <= 0:
|
||||||
|
logger.warning(f"Worker {worker_id} not found for stdout append")
|
||||||
|
return False
|
||||||
self._insert_worker_log_entry(
|
self._insert_worker_log_entry(
|
||||||
cursor,
|
cursor,
|
||||||
worker_id,
|
worker_id,
|
||||||
|
|||||||
50
CLI.py
50
CLI.py
@@ -2856,6 +2856,56 @@ class PipelineExecutor:
|
|||||||
except Exception:
|
except Exception:
|
||||||
auto_stage = None
|
auto_stage = None
|
||||||
|
|
||||||
|
source_cmd_for_selection = None
|
||||||
|
source_args_for_selection: List[str] = []
|
||||||
|
try:
|
||||||
|
source_cmd_for_selection = (
|
||||||
|
ctx.get_current_stage_table_source_command()
|
||||||
|
or ctx.get_last_result_table_source_command()
|
||||||
|
)
|
||||||
|
source_args_for_selection = (
|
||||||
|
ctx.get_current_stage_table_source_args()
|
||||||
|
or ctx.get_last_result_table_source_args()
|
||||||
|
or []
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
source_cmd_for_selection = None
|
||||||
|
source_args_for_selection = []
|
||||||
|
|
||||||
|
if not stages and selection_indices and source_cmd_for_selection:
|
||||||
|
src_norm = _norm_cmd(source_cmd_for_selection)
|
||||||
|
if src_norm in {".worker", "worker", "workers"}:
|
||||||
|
if len(selection_indices) == 1:
|
||||||
|
idx = selection_indices[0]
|
||||||
|
row_args = None
|
||||||
|
try:
|
||||||
|
row_args = ctx.get_current_stage_table_row_selection_args(idx)
|
||||||
|
except Exception:
|
||||||
|
row_args = None
|
||||||
|
if not row_args:
|
||||||
|
try:
|
||||||
|
row_args = ctx.get_last_result_table_row_selection_args(idx)
|
||||||
|
except Exception:
|
||||||
|
row_args = None
|
||||||
|
if not row_args:
|
||||||
|
try:
|
||||||
|
items = ctx.get_last_result_items() or []
|
||||||
|
if 0 <= idx < len(items):
|
||||||
|
maybe = items[idx]
|
||||||
|
if isinstance(maybe, dict):
|
||||||
|
candidate = maybe.get("_selection_args")
|
||||||
|
if isinstance(candidate, (list, tuple)):
|
||||||
|
row_args = [str(x) for x in candidate if x is not None]
|
||||||
|
except Exception:
|
||||||
|
row_args = row_args or None
|
||||||
|
|
||||||
|
if row_args:
|
||||||
|
stages.append(
|
||||||
|
[str(source_cmd_for_selection)]
|
||||||
|
+ [str(x) for x in row_args if x is not None]
|
||||||
|
+ [str(x) for x in source_args_for_selection if x is not None]
|
||||||
|
)
|
||||||
|
|
||||||
def _apply_row_action_to_stage(stage_idx: int) -> bool:
|
def _apply_row_action_to_stage(stage_idx: int) -> bool:
|
||||||
if not selection_indices or len(selection_indices) != 1:
|
if not selection_indices or len(selection_indices) != 1:
|
||||||
return False
|
return False
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ persistence to database and optional auto-refresh callbacks.
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional, Dict, Any, List, Callable
|
from typing import Optional, Dict, Any, List, Callable, Tuple
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from threading import Thread, Lock
|
from threading import Thread, Lock
|
||||||
import time
|
import time
|
||||||
@@ -270,6 +270,13 @@ class WorkerManager:
|
|||||||
WorkerLoggingHandler] = {} # Track active handlers
|
WorkerLoggingHandler] = {} # Track active handlers
|
||||||
self._worker_last_step: Dict[str,
|
self._worker_last_step: Dict[str,
|
||||||
str] = {}
|
str] = {}
|
||||||
|
# Buffered stdout/log batching to reduce DB lock contention.
|
||||||
|
self._stdout_buffers: Dict[Tuple[str, str], List[str]] = {}
|
||||||
|
self._stdout_buffer_sizes: Dict[Tuple[str, str], int] = {}
|
||||||
|
self._stdout_buffer_steps: Dict[Tuple[str, str], Optional[str]] = {}
|
||||||
|
self._stdout_last_flush: Dict[Tuple[str, str], float] = {}
|
||||||
|
self._stdout_flush_bytes = 4096
|
||||||
|
self._stdout_flush_interval = 0.75
|
||||||
|
|
||||||
def close(self) -> None:
|
def close(self) -> None:
|
||||||
"""Close the database connection."""
|
"""Close the database connection."""
|
||||||
@@ -392,6 +399,12 @@ class WorkerManager:
|
|||||||
root_logger = logging.getLogger()
|
root_logger = logging.getLogger()
|
||||||
root_logger.removeHandler(handler)
|
root_logger.removeHandler(handler)
|
||||||
|
|
||||||
|
# Flush any buffered stdout/log data for this worker
|
||||||
|
try:
|
||||||
|
self.flush_worker_stdout(worker_id)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"[WorkerManager] Disabled logging for worker: {worker_id}"
|
f"[WorkerManager] Disabled logging for worker: {worker_id}"
|
||||||
)
|
)
|
||||||
@@ -508,6 +521,10 @@ class WorkerManager:
|
|||||||
True if update was successful
|
True if update was successful
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
try:
|
||||||
|
self.flush_worker_stdout(worker_id)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
kwargs = {
|
kwargs = {
|
||||||
"status": result,
|
"status": result,
|
||||||
"completed_at": datetime.now().isoformat()
|
"completed_at": datetime.now().isoformat()
|
||||||
@@ -742,17 +759,119 @@ class WorkerManager:
|
|||||||
Returns:
|
Returns:
|
||||||
True if append was successful
|
True if append was successful
|
||||||
"""
|
"""
|
||||||
try:
|
if not text:
|
||||||
|
return True
|
||||||
|
|
||||||
|
now = time.monotonic()
|
||||||
step_label = self._get_last_step(worker_id)
|
step_label = self._get_last_step(worker_id)
|
||||||
|
key = (worker_id, channel)
|
||||||
|
pending_flush: List[Tuple[str, str, Optional[str], str]] = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
with self._lock:
|
||||||
|
# Initialize last flush time for this buffer
|
||||||
|
if key not in self._stdout_last_flush:
|
||||||
|
self._stdout_last_flush[key] = now
|
||||||
|
|
||||||
|
current_step = self._stdout_buffer_steps.get(key)
|
||||||
|
if current_step is None:
|
||||||
|
self._stdout_buffer_steps[key] = step_label
|
||||||
|
current_step = step_label
|
||||||
|
|
||||||
|
# If step changes, flush existing buffer to keep step tags coherent
|
||||||
|
if current_step != step_label:
|
||||||
|
buffered = "".join(self._stdout_buffers.get(key, []))
|
||||||
|
if buffered:
|
||||||
|
pending_flush.append((worker_id, channel, current_step, buffered))
|
||||||
|
self._stdout_buffers[key] = []
|
||||||
|
self._stdout_buffer_sizes[key] = 0
|
||||||
|
self._stdout_last_flush[key] = now
|
||||||
|
self._stdout_buffer_steps[key] = step_label
|
||||||
|
|
||||||
|
buf = self._stdout_buffers.setdefault(key, [])
|
||||||
|
buf.append(text)
|
||||||
|
size = self._stdout_buffer_sizes.get(key, 0) + len(text)
|
||||||
|
self._stdout_buffer_sizes[key] = size
|
||||||
|
|
||||||
|
last_flush = self._stdout_last_flush.get(key, now)
|
||||||
|
should_flush = (
|
||||||
|
size >= self._stdout_flush_bytes
|
||||||
|
or (now - last_flush) >= self._stdout_flush_interval
|
||||||
|
)
|
||||||
|
if should_flush:
|
||||||
|
buffered = "".join(self._stdout_buffers.get(key, []))
|
||||||
|
if buffered:
|
||||||
|
pending_flush.append(
|
||||||
|
(worker_id, channel, self._stdout_buffer_steps.get(key), buffered)
|
||||||
|
)
|
||||||
|
self._stdout_buffers[key] = []
|
||||||
|
self._stdout_buffer_sizes[key] = 0
|
||||||
|
self._stdout_last_flush[key] = now
|
||||||
|
self._stdout_buffer_steps[key] = None
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[WorkerManager] Error buffering stdout: {e}", exc_info=True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
ok = True
|
||||||
|
for wid, ch, step, payload in pending_flush:
|
||||||
|
try:
|
||||||
|
with self._db_lock:
|
||||||
|
result = self.db.append_worker_stdout(
|
||||||
|
wid,
|
||||||
|
payload,
|
||||||
|
step=step,
|
||||||
|
channel=ch
|
||||||
|
)
|
||||||
|
ok = ok and result
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
f"[WorkerManager] Error flushing stdout for {wid}: {e}",
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
ok = False
|
||||||
|
return ok
|
||||||
|
|
||||||
|
def flush_worker_stdout(self, worker_id: str) -> bool:
|
||||||
|
"""Flush any buffered stdout/log data for a worker."""
|
||||||
|
keys_to_flush: List[Tuple[str, str]] = []
|
||||||
|
with self._lock:
|
||||||
|
for key in list(self._stdout_buffers.keys()):
|
||||||
|
if key[0] == worker_id:
|
||||||
|
keys_to_flush.append(key)
|
||||||
|
|
||||||
|
ok = True
|
||||||
|
for wid, channel in keys_to_flush:
|
||||||
|
ok = self._flush_stdout_buffer(wid, channel) and ok
|
||||||
|
return ok
|
||||||
|
|
||||||
|
def _flush_stdout_buffer(self, worker_id: str, channel: str) -> bool:
|
||||||
|
key = (worker_id, channel)
|
||||||
|
with self._lock:
|
||||||
|
chunks = self._stdout_buffers.get(key)
|
||||||
|
if not chunks:
|
||||||
|
return True
|
||||||
|
text = "".join(chunks)
|
||||||
|
step = self._stdout_buffer_steps.get(key)
|
||||||
|
self._stdout_buffers[key] = []
|
||||||
|
self._stdout_buffer_sizes[key] = 0
|
||||||
|
self._stdout_last_flush[key] = time.monotonic()
|
||||||
|
self._stdout_buffer_steps[key] = None
|
||||||
|
|
||||||
|
if not text:
|
||||||
|
return True
|
||||||
|
try:
|
||||||
with self._db_lock:
|
with self._db_lock:
|
||||||
return self.db.append_worker_stdout(
|
return self.db.append_worker_stdout(
|
||||||
worker_id,
|
worker_id,
|
||||||
text,
|
text,
|
||||||
step=step_label,
|
step=step,
|
||||||
channel=channel
|
channel=channel,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"[WorkerManager] Error appending stdout: {e}", exc_info=True)
|
logger.error(
|
||||||
|
f"[WorkerManager] Error flushing stdout for {worker_id}: {e}",
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def get_stdout(self, worker_id: str) -> str:
|
def get_stdout(self, worker_id: str) -> str:
|
||||||
@@ -799,6 +918,17 @@ class WorkerManager:
|
|||||||
def close(self) -> None:
|
def close(self) -> None:
|
||||||
"""Close the worker manager and database connection."""
|
"""Close the worker manager and database connection."""
|
||||||
self.stop_auto_refresh()
|
self.stop_auto_refresh()
|
||||||
|
try:
|
||||||
|
self._flush_all_stdout_buffers()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
with self._db_lock:
|
with self._db_lock:
|
||||||
self.db.close()
|
self.db.close()
|
||||||
logger.info("[WorkerManager] Closed")
|
logger.info("[WorkerManager] Closed")
|
||||||
|
|
||||||
|
def _flush_all_stdout_buffers(self) -> None:
|
||||||
|
keys_to_flush: List[Tuple[str, str]] = []
|
||||||
|
with self._lock:
|
||||||
|
keys_to_flush = list(self._stdout_buffers.keys())
|
||||||
|
for wid, channel in keys_to_flush:
|
||||||
|
self._flush_stdout_buffer(wid, channel)
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import re
|
|||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import time
|
||||||
from collections.abc import Iterable as IterableABC
|
from collections.abc import Iterable as IterableABC
|
||||||
from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
|
from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
|
||||||
|
|
||||||
@@ -2640,6 +2641,9 @@ def propagate_metadata(
|
|||||||
is_same_length = len(new_items) == len(prev_normalized)
|
is_same_length = len(new_items) == len(prev_normalized)
|
||||||
|
|
||||||
for i, item in enumerate(new_items):
|
for i, item in enumerate(new_items):
|
||||||
|
if isinstance(item, dict) and item.get("_skip_metadata_propagation"):
|
||||||
|
normalized.append(item)
|
||||||
|
continue
|
||||||
try:
|
try:
|
||||||
obj = coerce_to_pipe_object(item)
|
obj = coerce_to_pipe_object(item)
|
||||||
except Exception:
|
except Exception:
|
||||||
@@ -3058,6 +3062,9 @@ def check_url_exists_in_storage(
|
|||||||
stage_ctx = None
|
stage_ctx = None
|
||||||
|
|
||||||
in_pipeline = bool(stage_ctx is not None or ("|" in str(current_cmd_text or "")))
|
in_pipeline = bool(stage_ctx is not None or ("|" in str(current_cmd_text or "")))
|
||||||
|
start_time = time.monotonic()
|
||||||
|
time_budget = 45.0
|
||||||
|
debug(f"[preflight] check_url_exists_in_storage: checking {len(urls)} url(s)")
|
||||||
if in_pipeline:
|
if in_pipeline:
|
||||||
try:
|
try:
|
||||||
already_checked = bool(
|
already_checked = bool(
|
||||||
@@ -3101,6 +3108,18 @@ def check_url_exists_in_storage(
|
|||||||
preflight_cache["url_duplicates"] = url_dup_cache
|
preflight_cache["url_duplicates"] = url_dup_cache
|
||||||
_store_preflight_cache(preflight_cache)
|
_store_preflight_cache(preflight_cache)
|
||||||
|
|
||||||
|
def _timed_out(reason: str) -> bool:
|
||||||
|
try:
|
||||||
|
if (time.monotonic() - start_time) >= time_budget:
|
||||||
|
debug(
|
||||||
|
f"Bulk URL preflight timed out after {time_budget:.0f}s ({reason}); continuing"
|
||||||
|
)
|
||||||
|
_mark_preflight_checked()
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
return False
|
||||||
|
|
||||||
if in_pipeline:
|
if in_pipeline:
|
||||||
try:
|
try:
|
||||||
cached_cmd = pipeline_context.load_value("preflight.url_duplicates.command", default="")
|
cached_cmd = pipeline_context.load_value("preflight.url_duplicates.command", default="")
|
||||||
@@ -3358,7 +3377,10 @@ def check_url_exists_in_storage(
|
|||||||
_mark_preflight_checked()
|
_mark_preflight_checked()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
bulk_mode = len(unique_urls) >= 8
|
if _timed_out("before backend scan"):
|
||||||
|
return True
|
||||||
|
|
||||||
|
bulk_mode = len(unique_urls) > 1
|
||||||
|
|
||||||
def _build_bulk_patterns(needles_map: Dict[str, List[str]], max_per_url: int = 3, max_total: int = 240) -> List[str]:
|
def _build_bulk_patterns(needles_map: Dict[str, List[str]], max_per_url: int = 3, max_total: int = 240) -> List[str]:
|
||||||
patterns: List[str] = []
|
patterns: List[str] = []
|
||||||
@@ -3562,6 +3584,8 @@ def check_url_exists_in_storage(
|
|||||||
HydrusNetwork = None # type: ignore
|
HydrusNetwork = None # type: ignore
|
||||||
|
|
||||||
for backend_name in backend_names:
|
for backend_name in backend_names:
|
||||||
|
if _timed_out("backend scan"):
|
||||||
|
return True
|
||||||
if len(match_rows) >= max_rows:
|
if len(match_rows) >= max_rows:
|
||||||
break
|
break
|
||||||
try:
|
try:
|
||||||
@@ -3569,6 +3593,8 @@ def check_url_exists_in_storage(
|
|||||||
except Exception:
|
except Exception:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
debug(f"[preflight] Scanning backend: {backend_name}")
|
||||||
|
|
||||||
if HydrusNetwork is not None and isinstance(backend, HydrusNetwork):
|
if HydrusNetwork is not None and isinstance(backend, HydrusNetwork):
|
||||||
client = getattr(backend, "_client", None)
|
client = getattr(backend, "_client", None)
|
||||||
if client is None:
|
if client is None:
|
||||||
@@ -3576,6 +3602,9 @@ def check_url_exists_in_storage(
|
|||||||
if not hydrus_available:
|
if not hydrus_available:
|
||||||
debug("Bulk URL preflight: hydrus availability check failed; attempting best-effort lookup")
|
debug("Bulk URL preflight: hydrus availability check failed; attempting best-effort lookup")
|
||||||
|
|
||||||
|
if _timed_out("hydrus scan"):
|
||||||
|
return True
|
||||||
|
|
||||||
if bulk_mode and bulk_patterns:
|
if bulk_mode and bulk_patterns:
|
||||||
bulk_hits: Optional[List[Any]] = None
|
bulk_hits: Optional[List[Any]] = None
|
||||||
bulk_limit = min(2000, max(200, len(unique_urls) * 8))
|
bulk_limit = min(2000, max(200, len(unique_urls) * 8))
|
||||||
@@ -3591,8 +3620,13 @@ def check_url_exists_in_storage(
|
|||||||
except Exception:
|
except Exception:
|
||||||
bulk_hits = None
|
bulk_hits = None
|
||||||
|
|
||||||
if bulk_hits is not None:
|
if bulk_hits is None:
|
||||||
|
debug("Bulk URL preflight: Hydrus bulk scan failed; skipping per-URL checks")
|
||||||
|
continue
|
||||||
|
|
||||||
for hit in bulk_hits:
|
for hit in bulk_hits:
|
||||||
|
if _timed_out("hydrus bulk scan"):
|
||||||
|
return True
|
||||||
if len(match_rows) >= max_rows:
|
if len(match_rows) >= max_rows:
|
||||||
break
|
break
|
||||||
url_values = _extract_urls_from_hit(hit, backend, allow_backend_lookup=False)
|
url_values = _extract_urls_from_hit(hit, backend, allow_backend_lookup=False)
|
||||||
@@ -3600,6 +3634,8 @@ def check_url_exists_in_storage(
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
for original_url, needles in url_needles.items():
|
for original_url, needles in url_needles.items():
|
||||||
|
if _timed_out("hydrus bulk scan"):
|
||||||
|
return True
|
||||||
if len(match_rows) >= max_rows:
|
if len(match_rows) >= max_rows:
|
||||||
break
|
break
|
||||||
if (original_url, str(backend_name)) in seen_pairs:
|
if (original_url, str(backend_name)) in seen_pairs:
|
||||||
@@ -3625,6 +3661,8 @@ def check_url_exists_in_storage(
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
for original_url, needles in url_needles.items():
|
for original_url, needles in url_needles.items():
|
||||||
|
if _timed_out("hydrus per-url scan"):
|
||||||
|
return True
|
||||||
if len(match_rows) >= max_rows:
|
if len(match_rows) >= max_rows:
|
||||||
break
|
break
|
||||||
if (original_url, str(backend_name)) in seen_pairs:
|
if (original_url, str(backend_name)) in seen_pairs:
|
||||||
@@ -3705,6 +3743,8 @@ def check_url_exists_in_storage(
|
|||||||
|
|
||||||
if bulk_hits is not None:
|
if bulk_hits is not None:
|
||||||
for hit in bulk_hits:
|
for hit in bulk_hits:
|
||||||
|
if _timed_out("backend bulk scan"):
|
||||||
|
return True
|
||||||
if len(match_rows) >= max_rows:
|
if len(match_rows) >= max_rows:
|
||||||
break
|
break
|
||||||
url_values = _extract_urls_from_hit(hit, backend, allow_backend_lookup=False)
|
url_values = _extract_urls_from_hit(hit, backend, allow_backend_lookup=False)
|
||||||
@@ -3712,6 +3752,8 @@ def check_url_exists_in_storage(
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
for original_url, needles in url_needles.items():
|
for original_url, needles in url_needles.items():
|
||||||
|
if _timed_out("backend bulk scan"):
|
||||||
|
return True
|
||||||
if len(match_rows) >= max_rows:
|
if len(match_rows) >= max_rows:
|
||||||
break
|
break
|
||||||
if (original_url, str(backend_name)) in seen_pairs:
|
if (original_url, str(backend_name)) in seen_pairs:
|
||||||
@@ -3737,6 +3779,8 @@ def check_url_exists_in_storage(
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
for original_url, needles in url_needles.items():
|
for original_url, needles in url_needles.items():
|
||||||
|
if _timed_out("backend per-url scan"):
|
||||||
|
return True
|
||||||
if len(match_rows) >= max_rows:
|
if len(match_rows) >= max_rows:
|
||||||
break
|
break
|
||||||
if (original_url, str(backend_name)) in seen_pairs:
|
if (original_url, str(backend_name)) in seen_pairs:
|
||||||
|
|||||||
@@ -387,18 +387,23 @@ class Download_File(Cmdlet):
|
|||||||
|
|
||||||
total_items = len(expanded_items)
|
total_items = len(expanded_items)
|
||||||
processed_items = 0
|
processed_items = 0
|
||||||
|
debug(f"[download-file] Processing {total_items} piped item(s)...")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if total_items:
|
if total_items:
|
||||||
progress.set_percent(0)
|
progress.set_percent(0)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
for item in expanded_items:
|
for idx, item in enumerate(expanded_items, 1):
|
||||||
try:
|
try:
|
||||||
label = "item"
|
label = "item"
|
||||||
table = get_field(item, "table")
|
table = get_field(item, "table")
|
||||||
title = get_field(item, "title")
|
title = get_field(item, "title")
|
||||||
target = get_field(item, "path") or get_field(item, "url")
|
target = get_field(item, "path") or get_field(item, "url")
|
||||||
|
|
||||||
|
debug(f"[download-file] Item {idx}/{total_items}: {title or target or 'unnamed'}")
|
||||||
|
|
||||||
media_kind = get_field(item, "media_kind")
|
media_kind = get_field(item, "media_kind")
|
||||||
tags_val = get_field(item, "tag")
|
tags_val = get_field(item, "tag")
|
||||||
tags_list: Optional[List[str]]
|
tags_list: Optional[List[str]]
|
||||||
@@ -931,15 +936,26 @@ class Download_File(Cmdlet):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _init_storage(config: Dict[str, Any]) -> tuple[Optional[Any], bool]:
|
def _init_storage(config: Dict[str, Any]) -> tuple[Optional[Any], bool]:
|
||||||
|
# Cache storage object in config to avoid excessive DB initialization in loops
|
||||||
|
if isinstance(config, dict) and "_storage_cache" in config:
|
||||||
|
cached = config["_storage_cache"]
|
||||||
|
if isinstance(cached, tuple) and len(cached) == 2:
|
||||||
|
return cached # type: ignore
|
||||||
|
|
||||||
storage = None
|
storage = None
|
||||||
hydrus_available = True
|
hydrus_available = True
|
||||||
try:
|
try:
|
||||||
from Store import Store
|
from Store import Store
|
||||||
from API.HydrusNetwork import is_hydrus_available
|
from API.HydrusNetwork import is_hydrus_available
|
||||||
|
|
||||||
|
debug(f"[download-file] Initializing storage interface...")
|
||||||
storage = Store(config=config or {}, suppress_debug=True)
|
storage = Store(config=config or {}, suppress_debug=True)
|
||||||
hydrus_available = bool(is_hydrus_available(config or {}))
|
hydrus_available = bool(is_hydrus_available(config or {}))
|
||||||
except Exception:
|
|
||||||
|
if isinstance(config, dict):
|
||||||
|
config["_storage_cache"] = (storage, hydrus_available)
|
||||||
|
except Exception as e:
|
||||||
|
debug(f"[download-file] Storage initialization error: {e}")
|
||||||
storage = None
|
storage = None
|
||||||
return storage, hydrus_available
|
return storage, hydrus_available
|
||||||
|
|
||||||
@@ -1052,6 +1068,7 @@ class Download_File(Cmdlet):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def _canonicalize_url_for_storage(*, requested_url: str, ytdlp_tool: YtDlpTool, playlist_items: Optional[str]) -> str:
|
def _canonicalize_url_for_storage(*, requested_url: str, ytdlp_tool: YtDlpTool, playlist_items: Optional[str]) -> str:
|
||||||
if playlist_items:
|
if playlist_items:
|
||||||
|
debug(f"[download-file] Skipping canonicalization for playlist item(s): {playlist_items}")
|
||||||
return str(requested_url)
|
return str(requested_url)
|
||||||
try:
|
try:
|
||||||
cf = None
|
cf = None
|
||||||
@@ -1061,14 +1078,19 @@ class Download_File(Cmdlet):
|
|||||||
cf = str(cookie_path)
|
cf = str(cookie_path)
|
||||||
except Exception:
|
except Exception:
|
||||||
cf = None
|
cf = None
|
||||||
|
|
||||||
|
debug(f"[download-file] Canonicalizing URL: {requested_url}")
|
||||||
pr = probe_url(requested_url, no_playlist=False, timeout_seconds=15, cookiefile=cf)
|
pr = probe_url(requested_url, no_playlist=False, timeout_seconds=15, cookiefile=cf)
|
||||||
if isinstance(pr, dict):
|
if isinstance(pr, dict):
|
||||||
for key in ("webpage_url", "original_url", "url", "requested_url"):
|
for key in ("webpage_url", "original_url", "url", "requested_url"):
|
||||||
value = pr.get(key)
|
value = pr.get(key)
|
||||||
if isinstance(value, str) and value.strip():
|
if isinstance(value, str) and value.strip():
|
||||||
return value.strip()
|
canon = value.strip()
|
||||||
except Exception:
|
if canon != requested_url:
|
||||||
pass
|
debug(f"[download-file] Resolved canonical URL: {requested_url} -> {canon}")
|
||||||
|
return canon
|
||||||
|
except Exception as e:
|
||||||
|
debug(f"[download-file] Canonicalization error for {requested_url}: {e}")
|
||||||
return str(requested_url)
|
return str(requested_url)
|
||||||
|
|
||||||
|
|
||||||
@@ -1113,6 +1135,10 @@ class Download_File(Cmdlet):
|
|||||||
|
|
||||||
|
|
||||||
def _maybe_show_playlist_table(self, *, url: str, ytdlp_tool: YtDlpTool) -> bool:
|
def _maybe_show_playlist_table(self, *, url: str, ytdlp_tool: YtDlpTool) -> bool:
|
||||||
|
ctx = pipeline_context.get_stage_context()
|
||||||
|
if ctx is not None and getattr(ctx, "total_stages", 0) > 1:
|
||||||
|
return False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
cf = self._cookiefile_str(ytdlp_tool)
|
cf = self._cookiefile_str(ytdlp_tool)
|
||||||
pr = probe_url(url, no_playlist=False, timeout_seconds=15, cookiefile=cf)
|
pr = probe_url(url, no_playlist=False, timeout_seconds=15, cookiefile=cf)
|
||||||
@@ -1240,6 +1266,13 @@ class Download_File(Cmdlet):
|
|||||||
args: Sequence[str],
|
args: Sequence[str],
|
||||||
skip_preflight: bool = False,
|
skip_preflight: bool = False,
|
||||||
) -> Optional[int]:
|
) -> Optional[int]:
|
||||||
|
try:
|
||||||
|
ctx = pipeline_context.get_stage_context()
|
||||||
|
if ctx is not None and getattr(ctx, "total_stages", 0) > 1:
|
||||||
|
# In pipelines, skip interactive format tables; require explicit -query format.
|
||||||
|
return None
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
if (
|
if (
|
||||||
mode != "audio"
|
mode != "audio"
|
||||||
and not clip_spec
|
and not clip_spec
|
||||||
@@ -1415,7 +1448,7 @@ class Download_File(Cmdlet):
|
|||||||
|
|
||||||
for url in supported_url:
|
for url in supported_url:
|
||||||
try:
|
try:
|
||||||
debug(f"Processing: {url}")
|
debug(f"[download-file] Processing URL in loop (1/3 stage 1): {url}")
|
||||||
|
|
||||||
canonical_url = self._canonicalize_url_for_storage(
|
canonical_url = self._canonicalize_url_for_storage(
|
||||||
requested_url=url,
|
requested_url=url,
|
||||||
@@ -1424,6 +1457,7 @@ class Download_File(Cmdlet):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if not skip_per_url_preflight:
|
if not skip_per_url_preflight:
|
||||||
|
debug(f"[download-file] Running duplicate preflight for: {canonical_url}")
|
||||||
if not self._preflight_url_duplicate(
|
if not self._preflight_url_duplicate(
|
||||||
storage=storage,
|
storage=storage,
|
||||||
hydrus_available=hydrus_available,
|
hydrus_available=hydrus_available,
|
||||||
@@ -1431,7 +1465,7 @@ class Download_File(Cmdlet):
|
|||||||
candidate_url=canonical_url,
|
candidate_url=canonical_url,
|
||||||
extra_urls=[url],
|
extra_urls=[url],
|
||||||
):
|
):
|
||||||
log(f"Skipping download: {url}", file=sys.stderr)
|
log(f"Skipping download (duplicate found): {url}", file=sys.stderr)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
PipelineProgress(pipeline_context).begin_steps(2)
|
PipelineProgress(pipeline_context).begin_steps(2)
|
||||||
@@ -1510,9 +1544,9 @@ class Download_File(Cmdlet):
|
|||||||
)
|
)
|
||||||
|
|
||||||
PipelineProgress(pipeline_context).step("downloading")
|
PipelineProgress(pipeline_context).step("downloading")
|
||||||
debug(f"Starting download with 5-minute timeout...")
|
debug(f"Starting download for {url} (format: {actual_format or 'default'}) with {download_timeout_seconds}s activity timeout...")
|
||||||
result_obj = _download_with_timeout(opts, timeout_seconds=download_timeout_seconds)
|
result_obj = _download_with_timeout(opts, timeout_seconds=download_timeout_seconds)
|
||||||
debug(f"Download completed, building pipe object...")
|
debug(f"Download completed for {url}, building pipe object...")
|
||||||
break
|
break
|
||||||
except DownloadError as e:
|
except DownloadError as e:
|
||||||
cause = getattr(e, "__cause__", None)
|
cause = getattr(e, "__cause__", None)
|
||||||
@@ -1816,14 +1850,21 @@ class Download_File(Cmdlet):
|
|||||||
debug(f"Output directory: {final_output_dir}")
|
debug(f"Output directory: {final_output_dir}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# If we are already in a pipeline stage, the parent UI is already handling progress.
|
||||||
|
# Calling ensure_local_ui can cause re-initialization hangs on some platforms.
|
||||||
|
if pipeline_context.get_stage_context() is None:
|
||||||
|
debug("[download-file] Initializing local UI...")
|
||||||
PipelineProgress(pipeline_context).ensure_local_ui(
|
PipelineProgress(pipeline_context).ensure_local_ui(
|
||||||
label="download-file",
|
label="download-file",
|
||||||
total_items=len(supported_url),
|
total_items=len(supported_url),
|
||||||
items_preview=supported_url,
|
items_preview=supported_url,
|
||||||
)
|
)
|
||||||
except Exception:
|
else:
|
||||||
pass
|
debug("[download-file] Skipping local UI: running inside pipeline stage")
|
||||||
|
except Exception as e:
|
||||||
|
debug(f"[download-file] PipelineProgress update error: {e}")
|
||||||
|
|
||||||
|
debug("[download-file] Parsing clip and query specs...")
|
||||||
clip_spec = parsed.get("clip")
|
clip_spec = parsed.get("clip")
|
||||||
query_spec = parsed.get("query")
|
query_spec = parsed.get("query")
|
||||||
|
|
||||||
@@ -1914,6 +1955,7 @@ class Download_File(Cmdlet):
|
|||||||
|
|
||||||
if query_format and not query_wants_audio:
|
if query_format and not query_wants_audio:
|
||||||
try:
|
try:
|
||||||
|
debug(f"[download-file] Resolving numeric format for {candidate_url}...")
|
||||||
idx_fmt = self._format_id_for_query_index(query_format, candidate_url, formats_cache, ytdlp_tool)
|
idx_fmt = self._format_id_for_query_index(query_format, candidate_url, formats_cache, ytdlp_tool)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
log(f"Error parsing format selection: {e}", file=sys.stderr)
|
log(f"Error parsing format selection: {e}", file=sys.stderr)
|
||||||
@@ -1923,6 +1965,7 @@ class Download_File(Cmdlet):
|
|||||||
ytdl_format = idx_fmt
|
ytdl_format = idx_fmt
|
||||||
|
|
||||||
if not ytdl_format:
|
if not ytdl_format:
|
||||||
|
debug(f"[download-file] Checking for playlist at {candidate_url}...")
|
||||||
if self._maybe_show_playlist_table(url=candidate_url, ytdlp_tool=ytdlp_tool):
|
if self._maybe_show_playlist_table(url=candidate_url, ytdlp_tool=ytdlp_tool):
|
||||||
playlist_selection_handled = True
|
playlist_selection_handled = True
|
||||||
try:
|
try:
|
||||||
@@ -1996,6 +2039,7 @@ class Download_File(Cmdlet):
|
|||||||
forced_single_format_id = None
|
forced_single_format_id = None
|
||||||
forced_single_format_for_batch = False
|
forced_single_format_for_batch = False
|
||||||
|
|
||||||
|
debug(f"[download-file] Checking if format table should be shown...")
|
||||||
early_ret = self._maybe_show_format_table_for_single_url(
|
early_ret = self._maybe_show_format_table_for_single_url(
|
||||||
mode=mode,
|
mode=mode,
|
||||||
clip_spec=clip_spec,
|
clip_spec=clip_spec,
|
||||||
@@ -2023,6 +2067,7 @@ class Download_File(Cmdlet):
|
|||||||
except Exception:
|
except Exception:
|
||||||
timeout_seconds = 300
|
timeout_seconds = 300
|
||||||
|
|
||||||
|
debug(f"[download-file] Proceeding to final download call for {len(supported_url)} URL(s)...")
|
||||||
return self._download_supported_urls(
|
return self._download_supported_urls(
|
||||||
supported_url=supported_url,
|
supported_url=supported_url,
|
||||||
ytdlp_tool=ytdlp_tool,
|
ytdlp_tool=ytdlp_tool,
|
||||||
@@ -2693,14 +2738,17 @@ class Download_File(Cmdlet):
|
|||||||
config["_skip_direct_on_streaming_failure"] = True
|
config["_skip_direct_on_streaming_failure"] = True
|
||||||
|
|
||||||
if isinstance(config, dict) and config.get("_pipeobject_timeout_seconds") is None:
|
if isinstance(config, dict) and config.get("_pipeobject_timeout_seconds") is None:
|
||||||
config["_pipeobject_timeout_seconds"] = 60
|
# Use a generous default for individual items
|
||||||
|
config["_pipeobject_timeout_seconds"] = 600
|
||||||
|
|
||||||
successes = 0
|
successes = 0
|
||||||
failures = 0
|
failures = 0
|
||||||
last_code = 0
|
last_code = 0
|
||||||
for run_args in selection_runs:
|
total_selection = len(selection_runs)
|
||||||
debug(f"[ytdlp] Detected selection args from table selection: {run_args}")
|
debug(f"[download-file] Processing {total_selection} selected item(s) from table...")
|
||||||
debug(f"[ytdlp] Re-invoking download-file with: {run_args}")
|
for idx, run_args in enumerate(selection_runs, 1):
|
||||||
|
debug(f"[download-file] Item {idx}/{total_selection}: {run_args}")
|
||||||
|
debug(f"[download-file] Re-invoking download-file for selected item...")
|
||||||
exit_code = self._run_impl(None, run_args, config)
|
exit_code = self._run_impl(None, run_args, config)
|
||||||
if exit_code == 0:
|
if exit_code == 0:
|
||||||
successes += 1
|
successes += 1
|
||||||
|
|||||||
@@ -200,7 +200,8 @@ def _render_worker_list(db, status_filter: str | None, limit: int) -> int:
|
|||||||
date_str = _extract_date(started)
|
date_str = _extract_date(started)
|
||||||
start_time = _format_event_timestamp(started)
|
start_time = _format_event_timestamp(started)
|
||||||
end_time = _format_event_timestamp(ended)
|
end_time = _format_event_timestamp(ended)
|
||||||
worker_id = str(worker.get("worker_id") or worker.get("id") or "unknown")
|
worker_id_value = worker.get("worker_id") or worker.get("id")
|
||||||
|
worker_id = str(worker_id_value) if worker_id_value is not None else ""
|
||||||
status = str(worker.get("status") or "unknown")
|
status = str(worker.get("status") or "unknown")
|
||||||
result_state = str(worker.get("result") or "")
|
result_state = str(worker.get("result") or "")
|
||||||
status_label = status
|
status_label = status
|
||||||
@@ -223,11 +224,17 @@ def _render_worker_list(db, status_filter: str | None, limit: int) -> int:
|
|||||||
if description and description != error_message:
|
if description and description != error_message:
|
||||||
columns.append(("Details", description[:200]))
|
columns.append(("Details", description[:200]))
|
||||||
|
|
||||||
|
selection_args = None
|
||||||
|
if worker_id:
|
||||||
|
selection_args = ["-id", worker_id]
|
||||||
item = {
|
item = {
|
||||||
"columns": columns,
|
"columns": columns,
|
||||||
"__worker_metadata": worker,
|
"__worker_metadata": worker,
|
||||||
"_selection_args": ["-id", worker.get("worker_id")],
|
"worker_id": worker_id,
|
||||||
}
|
}
|
||||||
|
if selection_args:
|
||||||
|
item["_selection_args"] = list(selection_args)
|
||||||
|
item["selection_args"] = list(selection_args)
|
||||||
ctx.emit(item)
|
ctx.emit(item)
|
||||||
log(
|
log(
|
||||||
f"Worker {worker_id[:8]} status={status_label} pipe={pipe_display} "
|
f"Worker {worker_id[:8]} status={status_label} pipe={pipe_display} "
|
||||||
@@ -282,8 +289,38 @@ def _resolve_worker_record(db, payload: Any) -> Dict[str, Any] | None:
|
|||||||
|
|
||||||
|
|
||||||
def _emit_worker_detail(worker: Dict[str, Any], events: List[Dict[str, Any]]) -> None:
|
def _emit_worker_detail(worker: Dict[str, Any], events: List[Dict[str, Any]]) -> None:
|
||||||
stdout_content = worker.get("stdout", "") or ""
|
rows_emitted = False
|
||||||
|
|
||||||
|
def _emit_columns(columns: List[tuple[str, str]]) -> None:
|
||||||
|
nonlocal rows_emitted
|
||||||
|
payload = {
|
||||||
|
"columns": columns,
|
||||||
|
"_skip_metadata_propagation": True,
|
||||||
|
}
|
||||||
|
ctx.emit(payload)
|
||||||
|
rows_emitted = True
|
||||||
|
|
||||||
|
if events:
|
||||||
|
for event in events:
|
||||||
|
message = _normalize_text(event.get("message"))
|
||||||
|
if not message:
|
||||||
|
continue
|
||||||
|
|
||||||
|
level = _normalize_text(event.get("event_type") or event.get("channel") or "INFO")
|
||||||
|
step = _normalize_text(event.get("step"))
|
||||||
|
if step:
|
||||||
|
message = f"[{step}] {message}"
|
||||||
|
|
||||||
|
timestamp = _format_event_timestamp(event.get("created_at") or "")
|
||||||
|
|
||||||
|
_emit_columns([
|
||||||
|
("Time", timestamp),
|
||||||
|
("Level", level or "INFO"),
|
||||||
|
("Message", message),
|
||||||
|
])
|
||||||
|
|
||||||
|
if not rows_emitted:
|
||||||
|
stdout_content = worker.get("stdout", "") or ""
|
||||||
lines = stdout_content.splitlines()
|
lines = stdout_content.splitlines()
|
||||||
|
|
||||||
for line in lines:
|
for line in lines:
|
||||||
@@ -310,19 +347,23 @@ def _emit_worker_detail(worker: Dict[str, Any], events: List[Dict[str, Any]]) ->
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
item = {
|
_emit_columns([
|
||||||
"columns": [
|
("Time", timestamp),
|
||||||
("Time",
|
("Level", level),
|
||||||
timestamp),
|
("Message", message),
|
||||||
("Level",
|
])
|
||||||
level),
|
|
||||||
("Message",
|
|
||||||
message),
|
|
||||||
]
|
|
||||||
}
|
|
||||||
ctx.emit(item)
|
|
||||||
|
|
||||||
# Events are already always derived from stdout for now.
|
if not rows_emitted:
|
||||||
|
fallback = (
|
||||||
|
_normalize_text(worker.get("error_message"))
|
||||||
|
or _normalize_text(worker.get("description"))
|
||||||
|
or "No log output captured for this worker."
|
||||||
|
)
|
||||||
|
_emit_columns([
|
||||||
|
("Time", ""),
|
||||||
|
("Level", "INFO"),
|
||||||
|
("Message", fallback),
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
def _summarize_pipe(pipe_value: Any, limit: int = 200) -> str:
|
def _summarize_pipe(pipe_value: Any, limit: int = 200) -> str:
|
||||||
|
|||||||
@@ -145,6 +145,7 @@ def list_formats(
|
|||||||
no_playlist: bool = False,
|
no_playlist: bool = False,
|
||||||
playlist_items: Optional[str] = None,
|
playlist_items: Optional[str] = None,
|
||||||
cookiefile: Optional[str] = None,
|
cookiefile: Optional[str] = None,
|
||||||
|
timeout_seconds: int = 20,
|
||||||
) -> Optional[List[Dict[str, Any]]]:
|
) -> Optional[List[Dict[str, Any]]]:
|
||||||
"""Get available formats for a URL.
|
"""Get available formats for a URL.
|
||||||
|
|
||||||
@@ -154,6 +155,10 @@ def list_formats(
|
|||||||
if not is_url_supported_by_ytdlp(url):
|
if not is_url_supported_by_ytdlp(url):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
result_container: List[Optional[Any]] = [None, None] # [result, error]
|
||||||
|
|
||||||
|
def _do_list() -> None:
|
||||||
|
try:
|
||||||
ensure_yt_dlp_ready()
|
ensure_yt_dlp_ready()
|
||||||
assert yt_dlp is not None
|
assert yt_dlp is not None
|
||||||
|
|
||||||
@@ -162,6 +167,8 @@ def list_formats(
|
|||||||
"no_warnings": True,
|
"no_warnings": True,
|
||||||
"skip_download": True,
|
"skip_download": True,
|
||||||
"noprogress": True,
|
"noprogress": True,
|
||||||
|
"socket_timeout": min(10, max(1, int(timeout_seconds))),
|
||||||
|
"retries": 2,
|
||||||
}
|
}
|
||||||
|
|
||||||
if cookiefile:
|
if cookiefile:
|
||||||
@@ -175,26 +182,40 @@ def list_formats(
|
|||||||
if playlist_items:
|
if playlist_items:
|
||||||
ydl_opts["playlist_items"] = str(playlist_items)
|
ydl_opts["playlist_items"] = str(playlist_items)
|
||||||
|
|
||||||
try:
|
|
||||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
|
||||||
info = ydl.extract_info(url, download=False)
|
info = ydl.extract_info(url, download=False)
|
||||||
except Exception as exc:
|
|
||||||
debug(f"yt-dlp format probe failed for {url}: {exc}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
if not isinstance(info, dict):
|
if not isinstance(info, dict):
|
||||||
return None
|
result_container[0] = None
|
||||||
|
return
|
||||||
|
|
||||||
formats = info.get("formats")
|
formats = info.get("formats")
|
||||||
if not isinstance(formats, list):
|
if not isinstance(formats, list):
|
||||||
return None
|
result_container[0] = None
|
||||||
|
return
|
||||||
|
|
||||||
out: List[Dict[str, Any]] = []
|
out: List[Dict[str, Any]] = []
|
||||||
for fmt in formats:
|
for fmt in formats:
|
||||||
if isinstance(fmt, dict):
|
if isinstance(fmt, dict):
|
||||||
out.append(fmt)
|
out.append(fmt)
|
||||||
|
result_container[0] = out
|
||||||
|
except Exception as exc:
|
||||||
|
debug(f"yt-dlp format probe failed for {url}: {exc}")
|
||||||
|
result_container[1] = exc
|
||||||
|
|
||||||
return out
|
# Use daemon=True so a hung thread doesn't block process exit
|
||||||
|
thread = threading.Thread(target=_do_list, daemon=True)
|
||||||
|
thread.start()
|
||||||
|
thread.join(timeout=max(1, int(timeout_seconds)))
|
||||||
|
|
||||||
|
if thread.is_alive():
|
||||||
|
debug(f"yt-dlp format probe timed out for {url} (>={timeout_seconds}s)")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if result_container[1] is not None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return cast(Optional[List[Dict[str, Any]]], result_container[0])
|
||||||
|
|
||||||
|
|
||||||
def probe_url(
|
def probe_url(
|
||||||
@@ -216,6 +237,7 @@ def probe_url(
|
|||||||
|
|
||||||
def _do_probe() -> None:
|
def _do_probe() -> None:
|
||||||
try:
|
try:
|
||||||
|
debug(f"[probe] Starting probe for {url}")
|
||||||
ensure_yt_dlp_ready()
|
ensure_yt_dlp_ready()
|
||||||
|
|
||||||
assert yt_dlp is not None
|
assert yt_dlp is not None
|
||||||
@@ -235,7 +257,9 @@ def probe_url(
|
|||||||
ydl_opts["noplaylist"] = True
|
ydl_opts["noplaylist"] = True
|
||||||
|
|
||||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
|
||||||
|
debug(f"[probe] ytdlp extract_info (download=False) start: {url}")
|
||||||
info = ydl.extract_info(url, download=False)
|
info = ydl.extract_info(url, download=False)
|
||||||
|
debug(f"[probe] ytdlp extract_info (download=False) done: {url}")
|
||||||
|
|
||||||
if not isinstance(info, dict):
|
if not isinstance(info, dict):
|
||||||
result_container[0] = None
|
result_container[0] = None
|
||||||
@@ -258,7 +282,8 @@ def probe_url(
|
|||||||
debug(f"Probe error for {url}: {exc}")
|
debug(f"Probe error for {url}: {exc}")
|
||||||
result_container[1] = exc
|
result_container[1] = exc
|
||||||
|
|
||||||
thread = threading.Thread(target=_do_probe, daemon=False)
|
# Use daemon=True so a hung probe doesn't block the process
|
||||||
|
thread = threading.Thread(target=_do_probe, daemon=True)
|
||||||
thread.start()
|
thread.start()
|
||||||
thread.join(timeout=timeout_seconds)
|
thread.join(timeout=timeout_seconds)
|
||||||
|
|
||||||
@@ -1194,6 +1219,7 @@ except ImportError:
|
|||||||
def download_media(opts: DownloadOptions, *, debug_logger: Optional[DebugLogger] = None) -> Any:
|
def download_media(opts: DownloadOptions, *, debug_logger: Optional[DebugLogger] = None) -> Any:
|
||||||
"""Download streaming media exclusively via yt-dlp."""
|
"""Download streaming media exclusively via yt-dlp."""
|
||||||
|
|
||||||
|
debug(f"[download_media] start: {opts.url}")
|
||||||
try:
|
try:
|
||||||
netloc = urlparse(opts.url).netloc.lower()
|
netloc = urlparse(opts.url).netloc.lower()
|
||||||
except Exception:
|
except Exception:
|
||||||
@@ -1536,20 +1562,37 @@ def _download_with_timeout(opts: DownloadOptions, timeout_seconds: int = 300) ->
|
|||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
result_container[1] = exc
|
result_container[1] = exc
|
||||||
|
|
||||||
thread = threading.Thread(target=_do_download, daemon=False)
|
# Use daemon=True so a hung download doesn't block process exit if the wall timeout hits.
|
||||||
|
thread = threading.Thread(target=_do_download, daemon=True)
|
||||||
thread.start()
|
thread.start()
|
||||||
start_time = time.monotonic()
|
start_time = time.monotonic()
|
||||||
|
|
||||||
|
# We use two timeouts:
|
||||||
|
# 1. Activity timeout (no progress updates for X seconds)
|
||||||
|
# 2. Hard wall-clock timeout (total time for this URL)
|
||||||
|
# The wall-clock timeout is slightly larger than the activity timeout
|
||||||
|
# to allow for slow-but-steady progress, up to a hard cap (e.g. 10 minutes).
|
||||||
|
wall_timeout = max(timeout_seconds * 2, 600)
|
||||||
|
|
||||||
_record_progress_activity(start_time)
|
_record_progress_activity(start_time)
|
||||||
try:
|
try:
|
||||||
while thread.is_alive():
|
while thread.is_alive():
|
||||||
thread.join(1)
|
thread.join(1)
|
||||||
if not thread.is_alive():
|
if not thread.is_alive():
|
||||||
break
|
break
|
||||||
|
|
||||||
|
now = time.monotonic()
|
||||||
|
|
||||||
|
# Check activity timeout
|
||||||
last_activity = _get_last_progress_activity()
|
last_activity = _get_last_progress_activity()
|
||||||
if last_activity <= 0:
|
if last_activity <= 0:
|
||||||
last_activity = start_time
|
last_activity = start_time
|
||||||
if time.monotonic() - last_activity > timeout_seconds:
|
if now - last_activity > timeout_seconds:
|
||||||
raise DownloadError(f"Download timeout after {timeout_seconds} seconds for {opts.url}")
|
raise DownloadError(f"Download activity timeout after {timeout_seconds} seconds for {opts.url}")
|
||||||
|
|
||||||
|
# Check hard wall-clock timeout
|
||||||
|
if now - start_time > wall_timeout:
|
||||||
|
raise DownloadError(f"Download hard timeout after {wall_timeout} seconds for {opts.url}")
|
||||||
finally:
|
finally:
|
||||||
_clear_progress_activity()
|
_clear_progress_activity()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user