j
This commit is contained in:
@@ -25,7 +25,7 @@ from SYS.models import DebugLogger, DownloadError, DownloadMediaResult, Progress
|
||||
from SYS.utils import ensure_directory, sha256_file
|
||||
|
||||
try: # Optional; used for metadata extraction when available
|
||||
from SYS.metadata import extract_ytdlp_tags
|
||||
from SYS.yt_metadata import extract_ytdlp_tags
|
||||
except Exception: # pragma: no cover - optional dependency
|
||||
extract_ytdlp_tags = None # type: ignore[assignment]
|
||||
|
||||
@@ -56,7 +56,7 @@ def _resolve_verify_value(verify_ssl: bool) -> Union[bool, str]:
|
||||
mod = sys.modules.get(mod_name)
|
||||
if mod is None:
|
||||
mod = __import__(mod_name)
|
||||
except Exception:
|
||||
except (ImportError, ModuleNotFoundError):
|
||||
return None
|
||||
|
||||
# Common APIs that return a bundle path
|
||||
|
||||
@@ -353,7 +353,7 @@
|
||||
"filedot\\.(xyz|to|top)/([0-9a-zA-Z]{12})"
|
||||
],
|
||||
"regexp": "filedot\\.(xyz|to|top)/([0-9a-zA-Z]{12})",
|
||||
"status": false
|
||||
"status": true
|
||||
},
|
||||
"filefactory": {
|
||||
"name": "filefactory",
|
||||
@@ -622,7 +622,7 @@
|
||||
"(simfileshare\\.net/download/[0-9]+/)"
|
||||
],
|
||||
"regexp": "(simfileshare\\.net/download/[0-9]+/)",
|
||||
"status": false
|
||||
"status": true
|
||||
},
|
||||
"streamtape": {
|
||||
"name": "streamtape",
|
||||
|
||||
@@ -852,7 +852,7 @@ class YtdlpMetadataProvider(MetadataProvider):
|
||||
|
||||
tags: List[str] = []
|
||||
try:
|
||||
from SYS.metadata import extract_ytdlp_tags
|
||||
from SYS.yt_metadata import extract_ytdlp_tags
|
||||
except Exception:
|
||||
extract_ytdlp_tags = None # type: ignore[assignment]
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ from SYS.logger import log, debug
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple
|
||||
|
||||
from API.HydrusNetwork import apply_hydrus_tag_mutation, fetch_hydrus_metadata, fetch_hydrus_metadata_by_url
|
||||
from SYS.yt_metadata import extract_ytdlp_tags
|
||||
|
||||
try: # Optional; used when available for richer metadata fetches
|
||||
import yt_dlp
|
||||
@@ -918,72 +918,7 @@ def apply_tag_mutation(payload: Dict[str,
|
||||
}
|
||||
|
||||
|
||||
def extract_ytdlp_tags(entry: Dict[str, Any]) -> List[str]:
|
||||
""" """
|
||||
tags: List[str] = []
|
||||
seen_namespaces: Set[str] = set()
|
||||
|
||||
# Meaningful yt-dlp fields that should become tags
|
||||
# This mapping excludes technical fields: filesize, duration, format_id, vcodec, acodec, ext, etc.
|
||||
field_to_namespace = {
|
||||
"artist": "artist",
|
||||
"album": "album",
|
||||
"creator": "creator",
|
||||
"uploader": "creator", # Map uploader to creator (deduplicate)
|
||||
"uploader_id": "creator",
|
||||
"channel": "channel",
|
||||
"genre": "genre",
|
||||
"track": "track",
|
||||
"track_number": "track_number",
|
||||
"release_date": "release_date",
|
||||
"upload_date": "upload_date",
|
||||
"title": "title",
|
||||
"license": "license",
|
||||
"location": "location",
|
||||
}
|
||||
|
||||
# Extract simple field mappings
|
||||
for yt_field, namespace in field_to_namespace.items():
|
||||
value = entry.get(yt_field)
|
||||
if value is not None:
|
||||
value_str = value_normalize(str(value))
|
||||
if value_str:
|
||||
# Prevent duplicate creator tags (only use first creator)
|
||||
if namespace == "creator":
|
||||
if "creator" in seen_namespaces:
|
||||
continue
|
||||
seen_namespaces.add("creator")
|
||||
|
||||
_add_tag(tags, namespace, value_str)
|
||||
|
||||
# Handle tags field specially (could be list, dict, or string)
|
||||
# For list/sequence tags, capture as freeform (no namespace prefix)
|
||||
tags_field = entry.get("tags")
|
||||
if tags_field is not None:
|
||||
if isinstance(tags_field, list):
|
||||
# Tags is list: ["tag1", "tag2", ...] → capture as freeform tags (no "tag:" prefix)
|
||||
# These are typically genre/category tags from the source (BandCamp genres, etc.)
|
||||
for tag_value in tags_field:
|
||||
if tag_value:
|
||||
normalized = value_normalize(str(tag_value))
|
||||
if normalized and normalized not in tags:
|
||||
tags.append(normalized)
|
||||
elif isinstance(tags_field, dict):
|
||||
# Tags is dict: {"key": "val"} → tag:key:val
|
||||
for key, val in tags_field.items():
|
||||
if key and val:
|
||||
key_normalized = value_normalize(str(key))
|
||||
val_normalized = value_normalize(str(val))
|
||||
if key_normalized and val_normalized:
|
||||
_add_tag(tags, f"tag:{key_normalized}", val_normalized)
|
||||
else:
|
||||
# Tags is string or other: add as freeform
|
||||
if tags_field:
|
||||
normalized = value_normalize(str(tags_field))
|
||||
if normalized and normalized not in tags:
|
||||
tags.append(normalized)
|
||||
|
||||
return tags
|
||||
|
||||
|
||||
def dedup_tags_by_namespace(tags: List[str], keep_first: bool = True) -> List[str]:
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
import datetime
|
||||
import hashlib
|
||||
import inspect
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
@@ -996,13 +997,22 @@ class PipelineLiveProgress:
|
||||
def stop(self) -> None:
|
||||
# Safe to call whether Live is running or paused.
|
||||
if self._live is not None:
|
||||
stop_fn = self._live.stop
|
||||
has_clear = False
|
||||
try:
|
||||
signature = inspect.signature(stop_fn)
|
||||
has_clear = "clear" in signature.parameters
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
try:
|
||||
self._live.stop(clear=True)
|
||||
except TypeError:
|
||||
self._live.stop()
|
||||
if has_clear:
|
||||
stop_fn(clear=True)
|
||||
else:
|
||||
stop_fn()
|
||||
except Exception:
|
||||
self._live.stop()
|
||||
try:
|
||||
stop_fn()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@@ -2313,6 +2313,9 @@ class PipelineExecutor:
|
||||
if name in {"get-relationship",
|
||||
"get-rel"}:
|
||||
continue
|
||||
if name in {"get-metadata",
|
||||
"meta"}:
|
||||
continue
|
||||
# `.pipe` (MPV) is an interactive launcher; disable pipeline Live progress
|
||||
# for it because it doesn't meaningfully "complete" (mpv may keep running)
|
||||
# and Live output interferes with MPV playlist UI.
|
||||
|
||||
102
SYS/yt_metadata.py
Normal file
102
SYS/yt_metadata.py
Normal file
@@ -0,0 +1,102 @@
|
||||
import re
|
||||
from typing import Any, Dict, List, Set
|
||||
|
||||
|
||||
def value_normalize(value: Any) -> str:
|
||||
text = str(value).strip()
|
||||
return text.lower() if text else ""
|
||||
|
||||
|
||||
def _add_tag(tags: List[str], namespace: str, value: str) -> None:
|
||||
"""Add a namespaced tag if not already present."""
|
||||
if not namespace or not value:
|
||||
return
|
||||
normalized_value = value_normalize(value)
|
||||
if not normalized_value:
|
||||
return
|
||||
candidate = f"{namespace}:{normalized_value}"
|
||||
if candidate not in tags:
|
||||
tags.append(candidate)
|
||||
|
||||
|
||||
def extract_ytdlp_tags(entry: Dict[str, Any]) -> List[str]:
|
||||
""" """
|
||||
tags: List[str] = []
|
||||
seen_namespaces: Set[str] = set()
|
||||
|
||||
# Meaningful yt-dlp fields that should become tags
|
||||
# This mapping excludes technical fields: filesize, duration, format_id, vcodec, acodec, ext, etc.
|
||||
field_to_namespace = {
|
||||
"artist": "artist",
|
||||
"album": "album",
|
||||
"creator": "creator",
|
||||
"uploader": "creator", # Map uploader to creator (deduplicate)
|
||||
"uploader_id": "creator",
|
||||
"channel": "channel",
|
||||
"genre": "genre",
|
||||
"track": "track",
|
||||
"track_number": "track_number",
|
||||
"release_date": "release_date",
|
||||
"upload_date": "upload_date",
|
||||
"title": "title",
|
||||
"license": "license",
|
||||
"location": "location",
|
||||
}
|
||||
|
||||
# Extract simple field mappings
|
||||
for yt_field, namespace in field_to_namespace.items():
|
||||
value = entry.get(yt_field)
|
||||
if value is not None:
|
||||
value_str = value_normalize(str(value))
|
||||
if value_str:
|
||||
# Prevent duplicate creator tags (only use first creator)
|
||||
if namespace == "creator":
|
||||
if "creator" in seen_namespaces:
|
||||
continue
|
||||
seen_namespaces.add("creator")
|
||||
|
||||
_add_tag(tags, namespace, value_str)
|
||||
|
||||
# Handle tags field specially (could be list, dict, or string)
|
||||
# For list/sequence tags, capture as freeform (no namespace prefix)
|
||||
tags_field = entry.get("tags")
|
||||
if tags_field is not None:
|
||||
if isinstance(tags_field, list):
|
||||
# Tags is list: ["tag1", "tag2", ...] → capture as freeform tags (no "tag:" prefix)
|
||||
# These are typically genre/category tags from the source (BandCamp genres, etc.)
|
||||
for tag_value in tags_field:
|
||||
if tag_value:
|
||||
normalized = value_normalize(str(tag_value))
|
||||
if normalized and normalized not in tags:
|
||||
tags.append(normalized)
|
||||
elif isinstance(tags_field, dict):
|
||||
# Tags is dict: {"key": "val"} → tag:key:val
|
||||
for key, val in tags_field.items():
|
||||
if key and val:
|
||||
key_normalized = value_normalize(str(key))
|
||||
val_normalized = value_normalize(str(val))
|
||||
if key_normalized and val_normalized:
|
||||
_add_tag(tags, f"tag:{key_normalized}", val_normalized)
|
||||
else:
|
||||
# Tags is string: "tag1,tag2" → split and capture as freeform
|
||||
tag_str = str(tags_field).strip()
|
||||
if tag_str:
|
||||
for tag_value in re.split(r'[,\s]+', tag_str):
|
||||
tag_value = tag_value.strip()
|
||||
if tag_value:
|
||||
normalized = value_normalize(tag_value)
|
||||
if normalized and normalized not in tags:
|
||||
tags.append(normalized)
|
||||
|
||||
# Extract chapters as tags if present
|
||||
chapters = entry.get("chapters")
|
||||
if chapters and isinstance(chapters, list):
|
||||
for chapter in chapters:
|
||||
if isinstance(chapter, dict):
|
||||
title = chapter.get("title")
|
||||
if title:
|
||||
title_norm = value_normalize(str(title))
|
||||
if title_norm and title_norm not in tags:
|
||||
tags.append(title_norm)
|
||||
|
||||
return tags
|
||||
@@ -358,10 +358,25 @@ def _emit_tags_as_table(
|
||||
# Store the table and items in history so @.. works to go back
|
||||
# Use overlay mode so it doesn't push the previous search to history stack
|
||||
# This makes get-tag behave like a transient view
|
||||
table_applied = False
|
||||
try:
|
||||
ctx.set_last_result_table_overlay(table, tag_items, subject)
|
||||
table_applied = True
|
||||
except AttributeError:
|
||||
try:
|
||||
ctx.set_last_result_table(table, tag_items, subject)
|
||||
table_applied = True
|
||||
except Exception:
|
||||
table_applied = False
|
||||
except Exception:
|
||||
table_applied = False
|
||||
|
||||
if table_applied:
|
||||
try:
|
||||
if hasattr(ctx, "set_current_stage_table"):
|
||||
ctx.set_current_stage_table(table)
|
||||
except Exception:
|
||||
pass
|
||||
# Note: CLI will handle displaying the table via ResultTable formatting
|
||||
|
||||
|
||||
@@ -776,7 +791,7 @@ def _scrape_url_metadata(
|
||||
import json as json_module
|
||||
|
||||
try:
|
||||
from SYS.metadata import extract_ytdlp_tags
|
||||
from SYS.yt_metadata import extract_ytdlp_tags
|
||||
except ImportError:
|
||||
extract_ytdlp_tags = None
|
||||
|
||||
@@ -1613,6 +1628,33 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
if isinstance(result, list) and len(result) > 0:
|
||||
result = result[0]
|
||||
|
||||
try:
|
||||
display_subject = ctx.get_last_result_subject()
|
||||
except Exception:
|
||||
display_subject = None
|
||||
|
||||
def _value_has_content(value: Any) -> bool:
|
||||
if value is None:
|
||||
return False
|
||||
if isinstance(value, str):
|
||||
return bool(value.strip())
|
||||
if isinstance(value, (list, tuple, set)):
|
||||
return len(value) > 0
|
||||
return True
|
||||
|
||||
def _resolve_subject_value(*keys: str) -> Any:
|
||||
for key in keys:
|
||||
val = get_field(result, key, None)
|
||||
if _value_has_content(val):
|
||||
return val
|
||||
if display_subject is None:
|
||||
return None
|
||||
for key in keys:
|
||||
val = get_field(display_subject, key, None)
|
||||
if _value_has_content(val):
|
||||
return val
|
||||
return None
|
||||
|
||||
# If the current result already carries a tag list (e.g. a selected metadata
|
||||
# row from get-tag -scrape itunes), APPLY those tags to the file in the store.
|
||||
result_provider = get_field(result, "provider", None)
|
||||
@@ -1726,7 +1768,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
)
|
||||
return 0
|
||||
|
||||
hash_from_result = normalize_hash(get_field(result, "hash", None))
|
||||
hash_from_result = normalize_hash(_resolve_subject_value("hash"))
|
||||
file_hash = hash_override or hash_from_result
|
||||
# Only use emit mode if explicitly requested with --emit flag, not just because we're in a pipeline
|
||||
# This allows interactive REPL to work even in pipelines
|
||||
@@ -1734,7 +1776,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
store_label = store_key.strip() if store_key and store_key.strip() else None
|
||||
|
||||
# Get hash and store from result
|
||||
store_name = get_field(result, "store")
|
||||
store_value = _resolve_subject_value("store")
|
||||
store_name = str(store_value).strip() if store_value is not None else None
|
||||
|
||||
if not file_hash:
|
||||
log("No hash available in result", file=sys.stderr)
|
||||
@@ -1744,6 +1787,68 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
log("No store specified in result", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
item_title = (
|
||||
_resolve_subject_value("title", "name", "filename")
|
||||
)
|
||||
subject_store = store_name
|
||||
subject_path_value = (
|
||||
_resolve_subject_value("path", "target", "filename")
|
||||
)
|
||||
subject_path = None
|
||||
if subject_path_value is not None:
|
||||
try:
|
||||
subject_path = str(subject_path_value)
|
||||
except Exception:
|
||||
subject_path = None
|
||||
|
||||
service_name = ""
|
||||
subject_payload_base: Dict[str, Any] = {
|
||||
"tag": [],
|
||||
"title": item_title,
|
||||
"name": item_title,
|
||||
"store": subject_store,
|
||||
"service_name": service_name,
|
||||
"extra": {
|
||||
"tag": [],
|
||||
},
|
||||
}
|
||||
if file_hash:
|
||||
subject_payload_base["hash"] = file_hash
|
||||
if subject_path:
|
||||
subject_payload_base["path"] = subject_path
|
||||
|
||||
def _subject_payload_with(
|
||||
tags: Sequence[str],
|
||||
service_name_override: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
payload = dict(subject_payload_base)
|
||||
payload["tag"] = list(tags)
|
||||
extra = {"tag": list(tags)}
|
||||
payload["extra"] = extra
|
||||
if service_name_override is not None:
|
||||
payload["service_name"] = service_name_override
|
||||
return payload
|
||||
|
||||
raw_result_tags = get_field(result, "tag", None)
|
||||
if not isinstance(raw_result_tags, list):
|
||||
raw_result_tags = get_field(result, "tags", None)
|
||||
display_tags: List[str] = []
|
||||
if isinstance(raw_result_tags, list):
|
||||
display_tags = [str(t) for t in raw_result_tags if t is not None]
|
||||
if display_tags and not emit_mode:
|
||||
subject_payload = _subject_payload_with(display_tags)
|
||||
_emit_tags_as_table(
|
||||
display_tags,
|
||||
file_hash=file_hash,
|
||||
store=str(subject_store),
|
||||
service_name=None,
|
||||
config=config,
|
||||
item_title=item_title,
|
||||
path=subject_path,
|
||||
subject=subject_payload,
|
||||
)
|
||||
return 0
|
||||
|
||||
# Get tags using storage backend
|
||||
try:
|
||||
from Store import Store
|
||||
@@ -1761,56 +1866,18 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
log(f"Failed to get tags: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Always output to ResultTable (pipeline mode only)
|
||||
# Extract title for table header
|
||||
item_title = (
|
||||
get_field(result,
|
||||
"title",
|
||||
None) or get_field(result,
|
||||
"name",
|
||||
None) or get_field(result,
|
||||
"filename",
|
||||
None)
|
||||
subject_payload = _subject_payload_with(
|
||||
current,
|
||||
service_name if source == "hydrus" else None,
|
||||
)
|
||||
|
||||
# Build a subject payload representing the file whose tags are being shown
|
||||
subject_store = get_field(result, "store", None) or store_name
|
||||
subject_path = (
|
||||
get_field(result,
|
||||
"path",
|
||||
None) or get_field(result,
|
||||
"target",
|
||||
None) or get_field(result,
|
||||
"filename",
|
||||
None)
|
||||
)
|
||||
subject_payload: Dict[str,
|
||||
Any] = {
|
||||
"tag": list(current),
|
||||
"title": item_title,
|
||||
"name": item_title,
|
||||
"store": subject_store,
|
||||
"service_name": service_name,
|
||||
"extra": {
|
||||
"tag": list(current),
|
||||
},
|
||||
}
|
||||
if file_hash:
|
||||
subject_payload["hash"] = file_hash
|
||||
if subject_path:
|
||||
try:
|
||||
subject_payload["path"] = str(subject_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_emit_tags_as_table(
|
||||
current,
|
||||
file_hash=file_hash,
|
||||
store=subject_store,
|
||||
store=str(subject_store),
|
||||
service_name=service_name if source == "hydrus" else None,
|
||||
config=config,
|
||||
item_title=item_title,
|
||||
path=str(subject_path) if subject_path else None,
|
||||
path=subject_path,
|
||||
subject=subject_payload,
|
||||
)
|
||||
|
||||
|
||||
123
tool/ytdlp.py
123
tool/ytdlp.py
@@ -27,7 +27,7 @@ from SYS.models import (
|
||||
)
|
||||
from SYS.pipeline_progress import PipelineProgress
|
||||
from SYS.utils import ensure_directory, sha256_file
|
||||
from SYS.metadata import extract_ytdlp_tags
|
||||
from SYS.yt_metadata import extract_ytdlp_tags
|
||||
|
||||
_YTDLP_TRANSFER_STATE: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
@@ -44,6 +44,96 @@ else:
|
||||
|
||||
_EXTRACTOR_CACHE: List[Any] | None = None
|
||||
|
||||
# Patterns for domain extraction from yt-dlp regexes
|
||||
# 1) Alternation group followed by \.tld e.g. (?:youtube|youtu|youtube-nocookie)\.com
|
||||
ALT_GROUP_TLD = re.compile(r'\((?:\?:)?([^\)]+)\)\\\.(?P<tld>[A-Za-z0-9.+-]+)')
|
||||
# 2) Literal domain pieces like youtube\.com or youtu\.be (not preceded by a group)
|
||||
LITERAL_DOMAIN = re.compile(r'(?<!\()(?<!\|)(?<!:)([A-Za-z0-9][A-Za-z0-9_-]{0,})\\\.([A-Za-z0-9.+-]+)')
|
||||
# 3) Partial domain tokens that appear alone (e.g., zhihu) — treat as zhihu.com fallback
|
||||
PARTIAL_TOKEN = re.compile(r'(?<![A-Za-z0-9_-])([A-Za-z0-9][A-Za-z0-9_-]{1,})(?=(?:\\?[/\)\$]|\\\.|$))')
|
||||
|
||||
_SUPPORTED_DOMAINS: set[str] | None = None
|
||||
|
||||
|
||||
def normalize_patterns(valid_url) -> List[str]:
|
||||
if not valid_url:
|
||||
return []
|
||||
if isinstance(valid_url, str):
|
||||
return [valid_url]
|
||||
if isinstance(valid_url, (list, tuple)):
|
||||
return [p for p in valid_url if isinstance(p, str)]
|
||||
return []
|
||||
|
||||
|
||||
def extract_from_pattern(pat: str) -> set[str]:
|
||||
domains = set()
|
||||
|
||||
# 1) Alternation groups followed by .tld
|
||||
for alt_group, tld in ALT_GROUP_TLD.findall(pat):
|
||||
# alt_group like "youtube|youtu|youtube-nocookie"
|
||||
for alt in alt_group.split('|'):
|
||||
alt = alt.strip()
|
||||
# remove any non-domain tokens like (?:www\.)? if present inside alt (rare)
|
||||
alt = re.sub(r'\(\?:www\\\.\)\?', '', alt)
|
||||
if alt:
|
||||
domains.add(f"{alt}.{tld}".lower())
|
||||
|
||||
# 2) Literal domain matches (youtube\.com)
|
||||
for name, tld in LITERAL_DOMAIN.findall(pat):
|
||||
domains.add(f"{name}.{tld}".lower())
|
||||
|
||||
# 3) Partial tokens fallback (only if we didn't already capture domains)
|
||||
# This helps when regexes contain plain tokens like 'zhihu' or 'vimeo' without .com
|
||||
if not domains:
|
||||
for token in PARTIAL_TOKEN.findall(pat):
|
||||
# ignore common regex words that are not domains
|
||||
if len(token) <= 2:
|
||||
continue
|
||||
# avoid tokens that are clearly regex constructs
|
||||
if token.lower() in {"https", "http", "www", "com", "net", "org"}:
|
||||
continue
|
||||
domains.add(f"{token.lower()}.com")
|
||||
|
||||
return domains
|
||||
|
||||
|
||||
def extract_domains(valid_url) -> set[str]:
|
||||
patterns = normalize_patterns(valid_url)
|
||||
all_domains = set()
|
||||
for pat in patterns:
|
||||
all_domains |= extract_from_pattern(pat)
|
||||
# final cleanup: remove obvious junk like 'com.com' if present
|
||||
cleaned = set()
|
||||
for d in all_domains:
|
||||
# drop duplicates where left side equals tld (e.g., com.com)
|
||||
parts = d.split('.')
|
||||
if len(parts) >= 2 and parts[-2] == parts[-1]:
|
||||
continue
|
||||
cleaned.add(d)
|
||||
return cleaned
|
||||
|
||||
|
||||
def _build_supported_domains() -> set[str]:
|
||||
global _SUPPORTED_DOMAINS
|
||||
if _SUPPORTED_DOMAINS is not None:
|
||||
return _SUPPORTED_DOMAINS
|
||||
|
||||
_SUPPORTED_DOMAINS = set()
|
||||
if gen_extractors is None:
|
||||
return _SUPPORTED_DOMAINS
|
||||
|
||||
try:
|
||||
for e in gen_extractors():
|
||||
name = getattr(e, "IE_NAME", "").lower()
|
||||
if name == "generic":
|
||||
continue
|
||||
regex = getattr(e, "_VALID_URL", None)
|
||||
domains = extract_domains(regex)
|
||||
_SUPPORTED_DOMAINS.update(domains)
|
||||
except Exception:
|
||||
pass
|
||||
return _SUPPORTED_DOMAINS
|
||||
|
||||
|
||||
def _get_nested(config: Dict[str, Any], *path: str) -> Any:
|
||||
cur: Any = config
|
||||
@@ -122,16 +212,13 @@ def is_url_supported_by_ytdlp(url: str) -> bool:
|
||||
return False
|
||||
|
||||
try:
|
||||
for extractor in _get_extractors():
|
||||
try:
|
||||
if not extractor.suitable(url):
|
||||
continue
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
name = getattr(extractor, "IE_NAME", "").lower()
|
||||
if name == "generic":
|
||||
continue
|
||||
parsed = urlparse(url)
|
||||
domain = parsed.netloc.lower()
|
||||
if not domain:
|
||||
return False
|
||||
supported = _build_supported_domains()
|
||||
for base in supported:
|
||||
if domain == base or domain.endswith("." + base):
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
@@ -593,16 +680,22 @@ class YtDlpTool:
|
||||
# Defaulting to 'chrome' as the most common path.
|
||||
base_options["cookiesfrombrowser"] = "chrome"
|
||||
|
||||
# Special handling for format keywords
|
||||
if opts.ytdl_format == "audio":
|
||||
opts = opts._replace(mode="audio", ytdl_format=None)
|
||||
elif opts.ytdl_format == "video":
|
||||
opts = opts._replace(mode="video", ytdl_format=None)
|
||||
|
||||
if opts.no_playlist:
|
||||
base_options["noplaylist"] = True
|
||||
|
||||
fmt = opts.ytdl_format or self.default_format(opts.mode)
|
||||
base_options["format"] = fmt
|
||||
|
||||
# if opts.mode == "audio":
|
||||
# base_options["postprocessors"] = [{
|
||||
# "key": "FFmpegExtractAudio"
|
||||
# }]
|
||||
if opts.mode == "audio":
|
||||
base_options["postprocessors"] = [{
|
||||
"key": "FFmpegExtractAudio"
|
||||
}]
|
||||
|
||||
if opts.mode != "audio":
|
||||
format_sort = self.defaults.format_sort or [
|
||||
|
||||
Reference in New Issue
Block a user