added mhtml support and fixed some bugs in the process

This commit is contained in:
2026-04-22 21:19:55 -07:00
parent 90787bd0a2
commit 67c272db4b
9 changed files with 564 additions and 66 deletions
+92 -5
View File
@@ -613,6 +613,18 @@ def is_browseable_format(fmt: Any) -> bool:
if format_id.lower().startswith("sb"):
return False
protocol = str(fmt.get("protocol") or "").strip().lower()
size_bytes = fmt.get("filesize") or fmt.get("filesize_approx")
if (
protocol in {"m3u8", "m3u8_native"}
and re.fullmatch(r"\d+-\d+", format_id)
and not size_bytes
):
vcodec = str(fmt.get("vcodec", "none"))
acodec = str(fmt.get("acodec", "none"))
if vcodec != "none" and acodec != "none":
return False
# Filter out formats with no audio and no video
vcodec = str(fmt.get("vcodec", "none"))
@@ -620,6 +632,80 @@ def is_browseable_format(fmt: Any) -> bool:
return not (vcodec == "none" and acodec == "none")
def get_selection_format_id(
fmt: Dict[str, Any],
*,
video_audio_suffix: str = "ba",
) -> str:
format_id = str(fmt.get("format_id") or "").strip()
if not format_id:
return ""
vcodec = str(fmt.get("vcodec", "none"))
acodec = str(fmt.get("acodec", "none"))
selector_id = format_id
match = re.fullmatch(r"(?P<base>\d+)-[A-Za-z0-9]+", format_id)
if match and vcodec == "none" and acodec != "none":
selector_id = match.group("base")
if selector_id and vcodec != "none" and acodec == "none" and video_audio_suffix:
selector_id = f"{selector_id}+{video_audio_suffix}"
return selector_id
def get_display_format_id(fmt: Dict[str, Any]) -> str:
format_id = str(fmt.get("format_id") or "").strip()
if not format_id:
return ""
selector_id = get_selection_format_id(fmt, video_audio_suffix="")
return selector_id or format_id
def _picker_format_score(fmt: Dict[str, Any]) -> tuple[int, int, float]:
note = str(fmt.get("format_note") or fmt.get("format") or "").strip().lower()
format_id = str(fmt.get("format_id") or "").strip().lower()
prefers_original = 1 if ("original" in note or "default" in note) else 0
avoids_drc = 0 if ("-drc" in format_id or "drc" in note) else 1
magnitude = 0.0
for key in ("filesize", "filesize_approx", "abr", "tbr"):
value = fmt.get(key)
if isinstance(value, (int, float)):
magnitude = float(value)
break
if isinstance(value, str):
try:
magnitude = float(value.strip())
break
except Exception:
pass
return (prefers_original, avoids_drc, magnitude)
def collapse_picker_formats(
formats: Sequence[Dict[str, Any]],
*,
video_audio_suffix: str = "ba",
) -> List[Dict[str, Any]]:
collapsed: Dict[str, Dict[str, Any]] = {}
order: List[str] = []
for fmt in formats:
if not isinstance(fmt, dict) or not is_browseable_format(fmt):
continue
selector_id = get_selection_format_id(fmt, video_audio_suffix=video_audio_suffix)
if not selector_id:
continue
current = collapsed.get(selector_id)
if current is None:
collapsed[selector_id] = fmt
order.append(selector_id)
continue
if _picker_format_score(fmt) > _picker_format_score(current):
collapsed[selector_id] = fmt
return [collapsed[key] for key in order if key in collapsed]
def format_for_table_selection(
fmt: Dict[str, Any],
url: str,
@@ -648,6 +734,7 @@ def format_for_table_selection(
results = [format_for_table_selection(f, url, i+1) for i, f in enumerate(browseable)]
"""
format_id = fmt.get("format_id", "")
display_format_id = get_display_format_id(fmt)
resolution = fmt.get("resolution", "")
ext = fmt.get("ext", "")
vcodec = fmt.get("vcodec", "none")
@@ -657,10 +744,10 @@ def format_for_table_selection(
# If not provided, compute selection format ID (add +ba for video-only)
if selection_format_id is None:
selection_format_id = format_id
selection_format_id = get_selection_format_id(fmt, video_audio_suffix="ba")
try:
if vcodec != "none" and acodec == "none" and format_id:
selection_format_id = f"{format_id}+ba"
if not selection_format_id and format_id:
selection_format_id = format_id
except Exception:
from SYS.logger import logger
logger.exception("Failed to compute selection_format_id for format: %s", fmt)
@@ -694,14 +781,14 @@ def format_for_table_selection(
# Build table row
return {
"table": "download-file",
"title": f"Format {format_id}",
"title": f"Format {display_format_id or format_id}",
"url": url,
"target": url,
"detail": format_desc,
"annotations": [ext, resolution] if resolution else [ext],
"media_kind": "format",
"columns": [
("ID", format_id),
("ID", display_format_id or format_id),
("Resolution", resolution or "N/A"),
("Ext", ext),
("Size", size_str or ""),