f
This commit is contained in:
0
SYS/__init__.py
Normal file
0
SYS/__init__.py
Normal file
@@ -13,12 +13,14 @@ from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
# stubs if prompt_toolkit is not available so imports remain safe for testing.
|
||||
try:
|
||||
from prompt_toolkit.document import Document
|
||||
from prompt_toolkit.lexers import Lexer
|
||||
from prompt_toolkit.lexers import Lexer as _PTK_Lexer
|
||||
except Exception: # pragma: no cover - optional dependency
|
||||
Document = object # type: ignore
|
||||
# Fallback to a simple object when prompt_toolkit is not available
|
||||
_PTK_Lexer = object # type: ignore
|
||||
|
||||
class Lexer: # simple fallback base
|
||||
pass
|
||||
# Expose a stable name used by the rest of the module
|
||||
Lexer = _PTK_Lexer
|
||||
|
||||
|
||||
class SelectionSyntax:
|
||||
@@ -216,19 +218,19 @@ class SelectionFilterSyntax:
|
||||
if ":" in s:
|
||||
parts = [p.strip() for p in s.split(":")]
|
||||
if len(parts) == 2 and all(p.isdigit() for p in parts):
|
||||
m, sec = parts
|
||||
return max(0, int(m) * 60 + int(sec))
|
||||
m_str, sec_str = parts
|
||||
return max(0, int(m_str) * 60 + int(sec_str))
|
||||
if len(parts) == 3 and all(p.isdigit() for p in parts):
|
||||
h, m, sec = parts
|
||||
return max(0, int(h) * 3600 + int(m) * 60 + int(sec))
|
||||
h_str, m_str, sec_str = parts
|
||||
return max(0, int(h_str) * 3600 + int(m_str) * 60 + int(sec_str))
|
||||
|
||||
# token format: 1h2m3s (tokens can appear in any combination)
|
||||
total = 0
|
||||
found = False
|
||||
for m in SelectionFilterSyntax._DUR_TOKEN_RE.finditer(s):
|
||||
for match in SelectionFilterSyntax._DUR_TOKEN_RE.finditer(s):
|
||||
found = True
|
||||
n = int(m.group(1))
|
||||
unit = m.group(2).lower()
|
||||
n = int(match.group(1))
|
||||
unit = match.group(2).lower()
|
||||
if unit == "h":
|
||||
total += n * 3600
|
||||
elif unit == "m":
|
||||
|
||||
@@ -5,7 +5,7 @@ from __future__ import annotations
|
||||
import re
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
from typing import Any, Dict, Optional, List
|
||||
from SYS.logger import log
|
||||
from SYS.utils import expand_path
|
||||
|
||||
@@ -722,10 +722,6 @@ def reload_config(
|
||||
return load_config(config_dir=config_dir, filename=filename)
|
||||
|
||||
|
||||
def clear_config_cache() -> None:
|
||||
_CONFIG_CACHE.clear()
|
||||
|
||||
|
||||
def _validate_config_safety(config: Dict[str, Any]) -> None:
|
||||
"""Check for dangerous configurations, like folder stores in non-empty dirs."""
|
||||
store = config.get("store")
|
||||
|
||||
@@ -220,11 +220,11 @@ def extract_records(doc_or_html: Any, base_url: Optional[str] = None, xpaths: Op
|
||||
|
||||
records: List[Dict[str, str]] = []
|
||||
for row in rows:
|
||||
nr: Dict[str, str] = {}
|
||||
row_norm: Dict[str, str] = {}
|
||||
for k, v in (row or {}).items():
|
||||
nk = normalize_header(str(k or ""))
|
||||
nr[nk] = (str(v).strip() if v is not None else "")
|
||||
records.append(nr)
|
||||
row_norm[nk] = (str(v).strip() if v is not None else "")
|
||||
records.append(row_norm)
|
||||
|
||||
# Attempt to recover hrefs by matching anchor text -> href
|
||||
try:
|
||||
@@ -265,11 +265,11 @@ def extract_records(doc_or_html: Any, base_url: Optional[str] = None, xpaths: Op
|
||||
# Normalize keys (map platform->system etc)
|
||||
normed: List[Dict[str, str]] = []
|
||||
for r in records:
|
||||
nr: Dict[str, str] = {}
|
||||
norm_row: Dict[str, str] = {}
|
||||
for k, v in (r or {}).items():
|
||||
nk = normalize_header(k)
|
||||
nr[nk] = v
|
||||
normed.append(nr)
|
||||
norm_row[nk] = v
|
||||
normed.append(norm_row)
|
||||
|
||||
return normed, chosen
|
||||
|
||||
|
||||
@@ -24,16 +24,16 @@ def _coerce_value(value: Any) -> str:
|
||||
if isinstance(value, bool):
|
||||
return "true" if value else "false"
|
||||
if isinstance(value, (list, tuple, set)):
|
||||
parts = [_coerce_value(v) for v in value]
|
||||
cleaned = [part for part in parts if part]
|
||||
parts_list = [_coerce_value(v) for v in value]
|
||||
cleaned = [part for part in parts_list if part]
|
||||
return ", ".join(cleaned)
|
||||
if isinstance(value, dict):
|
||||
parts: List[str] = []
|
||||
dict_parts: List[str] = []
|
||||
for subkey, subvalue in value.items():
|
||||
part = _coerce_value(subvalue)
|
||||
if part:
|
||||
parts.append(f"{subkey}:{part}")
|
||||
return ", ".join(parts)
|
||||
dict_parts.append(f"{subkey}:{part}")
|
||||
return ", ".join(dict_parts)
|
||||
try:
|
||||
return str(value).strip()
|
||||
except Exception:
|
||||
|
||||
@@ -140,7 +140,7 @@ def debug_inspect(
|
||||
value=value,
|
||||
max_string=100_000,
|
||||
max_length=100_000,
|
||||
)
|
||||
) # type: ignore[call-arg]
|
||||
except TypeError:
|
||||
rich_inspect(
|
||||
obj,
|
||||
@@ -155,7 +155,6 @@ def debug_inspect(
|
||||
value=value,
|
||||
)
|
||||
|
||||
|
||||
def log(*args, **kwargs) -> None:
|
||||
"""Print with automatic file.function prefix.
|
||||
|
||||
|
||||
379
SYS/metadata.py
379
SYS/metadata.py
@@ -17,6 +17,14 @@ try: # Optional; used for IMDb lookup without API key
|
||||
from imdbinfo.services import search_title # type: ignore
|
||||
except Exception: # pragma: no cover - optional dependency
|
||||
search_title = None # type: ignore[assignment]
|
||||
try:
|
||||
import mutagen
|
||||
except ImportError:
|
||||
mutagen = None
|
||||
try:
|
||||
import musicbrainzngs
|
||||
except ImportError:
|
||||
musicbrainzngs = None
|
||||
|
||||
|
||||
def value_normalize(value: Any) -> str:
|
||||
@@ -93,6 +101,52 @@ def _sanitize_url(value: Optional[str]) -> Optional[str]:
|
||||
return cleaned
|
||||
|
||||
|
||||
def sanitize_metadata_value(value: Any) -> str:
|
||||
if value is None:
|
||||
return ""
|
||||
if isinstance(value, (list, tuple)):
|
||||
value = ", ".join(str(v) for v in value if v)
|
||||
return str(value).strip().replace("\n", " ").replace("\r", " ")
|
||||
|
||||
|
||||
def unique_preserve_order(items: Iterable[Any]) -> list[Any]:
|
||||
seen = set()
|
||||
result = []
|
||||
for item in items:
|
||||
if item not in seen:
|
||||
seen.add(item)
|
||||
result.append(item)
|
||||
return result
|
||||
|
||||
|
||||
def fetch_musicbrainz_tags(mbid: str, entity: str = "release") -> Dict[str, Any]:
|
||||
if not musicbrainzngs:
|
||||
return {"tag": []}
|
||||
|
||||
musicbrainzngs.set_useragent("Medeia-Macina", "0.1")
|
||||
tags: list[str] = []
|
||||
try:
|
||||
if entity == "release":
|
||||
res = musicbrainzngs.get_release_by_id(mbid, includes=["tags"])
|
||||
tags_list = res.get("release", {}).get("tag-list", [])
|
||||
elif entity == "recording":
|
||||
res = musicbrainzngs.get_recording_by_id(mbid, includes=["tags"])
|
||||
tags_list = res.get("recording", {}).get("tag-list", [])
|
||||
elif entity == "artist":
|
||||
res = musicbrainzngs.get_artist_by_id(mbid, includes=["tags"])
|
||||
tags_list = res.get("artist", {}).get("tag-list", [])
|
||||
else:
|
||||
return {"tag": []}
|
||||
|
||||
for t in tags_list:
|
||||
if isinstance(t, dict) and "name" in t:
|
||||
tags.append(t["name"])
|
||||
except Exception as exc:
|
||||
debug(f"MusicBrainz lookup failed: {exc}")
|
||||
|
||||
return {"tag": tags}
|
||||
|
||||
|
||||
def _clean_existing_tags(existing: Any) -> List[str]:
|
||||
tags: List[str] = []
|
||||
seen: Set[str] = set()
|
||||
@@ -601,7 +655,7 @@ def write_tags(
|
||||
|
||||
# Write via consolidated function
|
||||
try:
|
||||
lines = []
|
||||
lines: List[str] = []
|
||||
lines.extend(str(tag).strip().lower() for tag in tag_list if str(tag).strip())
|
||||
|
||||
if lines:
|
||||
@@ -2415,11 +2469,6 @@ def scrape_url_metadata(
|
||||
try:
|
||||
import json as json_module
|
||||
|
||||
try:
|
||||
from SYS.metadata import extract_ytdlp_tags
|
||||
except ImportError:
|
||||
extract_ytdlp_tags = None
|
||||
|
||||
# Build yt-dlp command with playlist support
|
||||
# IMPORTANT: Do NOT use --flat-playlist! It strips metadata like artist, album, uploader, genre
|
||||
# Without it, yt-dlp gives us full metadata in an 'entries' array within a single JSON object
|
||||
@@ -2462,14 +2511,13 @@ def scrape_url_metadata(
|
||||
# is_playlist = 'entries' in data and isinstance(data.get('entries'), list)
|
||||
|
||||
# Extract tags and playlist items
|
||||
tags = []
|
||||
playlist_items = []
|
||||
tags: List[str] = []
|
||||
playlist_items: List[Dict[str, Any]] = []
|
||||
|
||||
# IMPORTANT: Extract album/playlist-level tags FIRST (before processing entries)
|
||||
# This ensures we get metadata about the collection, not just individual tracks
|
||||
if extract_ytdlp_tags:
|
||||
album_tags = extract_ytdlp_tags(data)
|
||||
tags.extend(album_tags)
|
||||
album_tags = extract_ytdlp_tags(data)
|
||||
tags.extend(album_tags)
|
||||
|
||||
# Case 1: Entries are nested in the main object (standard playlist structure)
|
||||
if "entries" in data and isinstance(data.get("entries"), list):
|
||||
@@ -2493,41 +2541,40 @@ def scrape_url_metadata(
|
||||
|
||||
# Extract tags from each entry and merge (but don't duplicate album-level tags)
|
||||
# Only merge entry tags that are multi-value prefixes (not single-value like title:, artist:, etc.)
|
||||
if extract_ytdlp_tags:
|
||||
entry_tags = extract_ytdlp_tags(entry)
|
||||
entry_tags = extract_ytdlp_tags(entry)
|
||||
|
||||
# Single-value namespaces that should not be duplicated from entries
|
||||
single_value_namespaces = {
|
||||
"title",
|
||||
"artist",
|
||||
"album",
|
||||
"creator",
|
||||
"channel",
|
||||
"release_date",
|
||||
"upload_date",
|
||||
"license",
|
||||
"location",
|
||||
}
|
||||
# Single-value namespaces that should not be duplicated from entries
|
||||
single_value_namespaces = {
|
||||
"title",
|
||||
"artist",
|
||||
"album",
|
||||
"creator",
|
||||
"channel",
|
||||
"release_date",
|
||||
"upload_date",
|
||||
"license",
|
||||
"location",
|
||||
}
|
||||
|
||||
for tag in entry_tags:
|
||||
# Extract the namespace (part before the colon)
|
||||
tag_namespace = tag.split(":",
|
||||
1)[0].lower(
|
||||
) if ":" in tag else None
|
||||
for tag in entry_tags:
|
||||
# Extract the namespace (part before the colon)
|
||||
tag_namespace = tag.split(":",
|
||||
1)[0].lower(
|
||||
) if ":" in tag else None
|
||||
|
||||
# Skip if this namespace already exists in tags (from album level)
|
||||
if tag_namespace and tag_namespace in single_value_namespaces:
|
||||
# Check if any tag with this namespace already exists in tags
|
||||
already_has_namespace = any(
|
||||
t.split(":",
|
||||
1)[0].lower() == tag_namespace for t in tags
|
||||
if ":" in t
|
||||
)
|
||||
if already_has_namespace:
|
||||
continue # Skip this tag, keep the album-level one
|
||||
# Skip if this namespace already exists in tags (from album level)
|
||||
if tag_namespace and tag_namespace in single_value_namespaces:
|
||||
# Check if any tag with this namespace already exists in tags
|
||||
already_has_namespace = any(
|
||||
t.split(":",
|
||||
1)[0].lower() == tag_namespace for t in tags
|
||||
if ":" in t
|
||||
)
|
||||
if already_has_namespace:
|
||||
continue # Skip this tag, keep the album-level one
|
||||
|
||||
if tag not in tags: # Avoid exact duplicates
|
||||
tags.append(tag)
|
||||
if tag not in tags: # Avoid exact duplicates
|
||||
tags.append(tag)
|
||||
|
||||
# Case 2: Playlist detected by playlist_count field (BandCamp albums, etc.)
|
||||
# These need a separate call with --flat-playlist to get the actual entries
|
||||
@@ -2586,7 +2633,7 @@ def scrape_url_metadata(
|
||||
pass # Silently ignore if we can't get playlist entries
|
||||
|
||||
# Fallback: if still no tags detected, get from first item
|
||||
if not tags and extract_ytdlp_tags:
|
||||
if not tags:
|
||||
tags = extract_ytdlp_tags(data)
|
||||
|
||||
# Extract formats from the main data object
|
||||
@@ -2595,11 +2642,7 @@ def scrape_url_metadata(
|
||||
formats = extract_url_formats(data.get("formats", []))
|
||||
|
||||
# Deduplicate tags by namespace to prevent duplicate title:, artist:, etc.
|
||||
try:
|
||||
if dedup_tags_by_namespace:
|
||||
tags = dedup_tags_by_namespace(tags, keep_first=True)
|
||||
except Exception:
|
||||
pass # If dedup fails, return tags as-is
|
||||
tags = dedup_tags_by_namespace(tags, keep_first=True)
|
||||
|
||||
return title, tags, formats, playlist_items
|
||||
|
||||
@@ -2617,8 +2660,8 @@ def extract_url_formats(formats: list) -> List[Tuple[str, str]]:
|
||||
Returns list of (display_label, format_id) tuples.
|
||||
"""
|
||||
try:
|
||||
video_formats = {} # {resolution: format_data}
|
||||
audio_formats = {} # {quality_label: format_data}
|
||||
video_formats: Dict[str, Dict[str, Any]] = {} # {resolution: format_data}
|
||||
audio_formats: Dict[str, Dict[str, Any]] = {} # {quality_label: format_data}
|
||||
|
||||
for fmt in formats:
|
||||
vcodec = fmt.get("vcodec", "none")
|
||||
@@ -2655,7 +2698,7 @@ def extract_url_formats(formats: list) -> List[Tuple[str, str]]:
|
||||
"abr": abr,
|
||||
}
|
||||
|
||||
result = []
|
||||
result: List[Tuple[str, str]] = []
|
||||
|
||||
# Add video formats in descending resolution order
|
||||
for res in sorted(video_formats.keys(),
|
||||
@@ -2674,3 +2717,237 @@ def extract_url_formats(formats: list) -> List[Tuple[str, str]]:
|
||||
except Exception as e:
|
||||
log(f"Error extracting formats: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
def prepare_ffmpeg_metadata(payload: Optional[dict[str, Any]]) -> dict[str, str]:
|
||||
if not isinstance(payload, dict):
|
||||
return {}
|
||||
metadata: dict[str, str] = {}
|
||||
|
||||
def set_field(key: str, raw: Any, limit: int = 2000) -> None:
|
||||
sanitized = sanitize_metadata_value(raw)
|
||||
if not sanitized:
|
||||
return
|
||||
if len(sanitized) > limit:
|
||||
sanitized = sanitized[:limit]
|
||||
metadata[key] = sanitized
|
||||
|
||||
set_field("title", payload.get("title"))
|
||||
set_field("artist", payload.get("artist"), 512)
|
||||
set_field("album", payload.get("album"), 512)
|
||||
set_field("date", payload.get("year") or payload.get("date"), 20)
|
||||
comment = payload.get("comment")
|
||||
tags_value = payload.get("tags")
|
||||
tag_strings: list[str] = []
|
||||
artists_from_tags: list[str] = []
|
||||
albums_from_tags: list[str] = []
|
||||
genres_from_tags: list[str] = []
|
||||
if isinstance(tags_value, list):
|
||||
for raw_tag in tags_value:
|
||||
if raw_tag is None:
|
||||
continue
|
||||
if not isinstance(raw_tag, str):
|
||||
raw_tag = str(raw_tag)
|
||||
tag = raw_tag.strip()
|
||||
if not tag:
|
||||
continue
|
||||
tag_strings.append(tag)
|
||||
namespace, sep, value = tag.partition(":")
|
||||
if sep and value:
|
||||
ns = namespace.strip().lower()
|
||||
value = value.strip()
|
||||
if ns in {"artist", "creator", "author", "performer"}:
|
||||
artists_from_tags.append(value)
|
||||
elif ns in {"album", "series", "collection", "group"}:
|
||||
albums_from_tags.append(value)
|
||||
elif ns in {"genre", "rating"}:
|
||||
genres_from_tags.append(value)
|
||||
elif ns in {"comment", "description"} and not comment:
|
||||
comment = value
|
||||
elif ns in {"year", "date"} and not (payload.get("year") or payload.get("date")):
|
||||
set_field("date", value, 20)
|
||||
else:
|
||||
genres_from_tags.append(tag)
|
||||
if "artist" not in metadata and artists_from_tags:
|
||||
set_field("artist", ", ".join(unique_preserve_order(artists_from_tags)[:3]), 512)
|
||||
if "album" not in metadata and albums_from_tags:
|
||||
set_field("album", unique_preserve_order(albums_from_tags)[0], 512)
|
||||
if genres_from_tags:
|
||||
set_field("genre", ", ".join(unique_preserve_order(genres_from_tags)[:5]), 256)
|
||||
if tag_strings:
|
||||
joined_tags = ", ".join(tag_strings[:50])
|
||||
set_field("keywords", joined_tags, 2000)
|
||||
if not comment:
|
||||
comment = joined_tags
|
||||
if comment:
|
||||
set_field("comment", str(comment), 2000)
|
||||
set_field("description", str(comment), 2000)
|
||||
return metadata
|
||||
|
||||
|
||||
def apply_mutagen_metadata(path: Path, metadata: dict[str, str], fmt: str) -> None:
|
||||
if fmt != "audio":
|
||||
return
|
||||
if not metadata:
|
||||
return
|
||||
if mutagen is None:
|
||||
return
|
||||
try:
|
||||
audio = mutagen.File(path, easy=True) # type: ignore[attr-defined]
|
||||
except Exception as exc: # pragma: no cover - best effort only
|
||||
log(f"mutagen load failed: {exc}", file=sys.stderr)
|
||||
return
|
||||
if audio is None:
|
||||
return
|
||||
field_map = {
|
||||
"title": "title",
|
||||
"artist": "artist",
|
||||
"album": "album",
|
||||
"genre": "genre",
|
||||
"comment": "comment",
|
||||
"description": "comment",
|
||||
"date": "date",
|
||||
}
|
||||
changed = False
|
||||
for source_key, target_key in field_map.items():
|
||||
value = metadata.get(source_key)
|
||||
if not value:
|
||||
continue
|
||||
try:
|
||||
audio[target_key] = [value]
|
||||
changed = True
|
||||
except Exception: # pragma: no cover - best effort only
|
||||
continue
|
||||
if not changed:
|
||||
return
|
||||
try:
|
||||
audio.save()
|
||||
except Exception as exc: # pragma: no cover - best effort only
|
||||
log(f"mutagen save failed: {exc}", file=sys.stderr)
|
||||
|
||||
|
||||
def build_ffmpeg_command(
|
||||
ffmpeg_path: str,
|
||||
input_path: Path,
|
||||
output_path: Path,
|
||||
fmt: str,
|
||||
max_width: int,
|
||||
metadata: Optional[dict[str, str]] = None,
|
||||
) -> list[str]:
|
||||
cmd = [ffmpeg_path, "-y", "-i", str(input_path)]
|
||||
if fmt in {"mp4", "webm"} and max_width and max_width > 0:
|
||||
cmd.extend(["-vf", f"scale='min({max_width},iw)':-2"])
|
||||
if metadata:
|
||||
for key, value in metadata.items():
|
||||
cmd.extend(["-metadata", f"{key}={value}"])
|
||||
|
||||
# Video formats
|
||||
if fmt == "mp4":
|
||||
cmd.extend([
|
||||
"-c:v",
|
||||
"libx265",
|
||||
"-preset",
|
||||
"medium",
|
||||
"-crf",
|
||||
"26",
|
||||
"-tag:v",
|
||||
"hvc1",
|
||||
"-pix_fmt",
|
||||
"yuv420p",
|
||||
"-c:a",
|
||||
"aac",
|
||||
"-b:a",
|
||||
"192k",
|
||||
"-movflags",
|
||||
"+faststart",
|
||||
])
|
||||
elif fmt == "webm":
|
||||
cmd.extend([
|
||||
"-c:v",
|
||||
"libvpx-vp9",
|
||||
"-b:v",
|
||||
"0",
|
||||
"-crf",
|
||||
"32",
|
||||
"-c:a",
|
||||
"libopus",
|
||||
"-b:a",
|
||||
"160k",
|
||||
])
|
||||
cmd.extend(["-f", "webm"])
|
||||
|
||||
# Audio formats
|
||||
elif fmt == "mp3":
|
||||
cmd.extend([
|
||||
"-vn",
|
||||
"-c:a",
|
||||
"libmp3lame",
|
||||
"-b:a",
|
||||
"192k",
|
||||
])
|
||||
cmd.extend(["-f", "mp3"])
|
||||
elif fmt == "flac":
|
||||
cmd.extend([
|
||||
"-vn",
|
||||
"-c:a",
|
||||
"flac",
|
||||
])
|
||||
cmd.extend(["-f", "flac"])
|
||||
elif fmt == "wav":
|
||||
cmd.extend([
|
||||
"-vn",
|
||||
"-c:a",
|
||||
"pcm_s16le",
|
||||
])
|
||||
cmd.extend(["-f", "wav"])
|
||||
elif fmt == "aac":
|
||||
cmd.extend([
|
||||
"-vn",
|
||||
"-c:a",
|
||||
"aac",
|
||||
"-b:a",
|
||||
"192k",
|
||||
])
|
||||
cmd.extend(["-f", "adts"])
|
||||
elif fmt == "m4a":
|
||||
cmd.extend([
|
||||
"-vn",
|
||||
"-c:a",
|
||||
"aac",
|
||||
"-b:a",
|
||||
"192k",
|
||||
])
|
||||
cmd.extend(["-f", "ipod"])
|
||||
elif fmt == "ogg":
|
||||
cmd.extend([
|
||||
"-vn",
|
||||
"-c:a",
|
||||
"libvorbis",
|
||||
"-b:a",
|
||||
"192k",
|
||||
])
|
||||
cmd.extend(["-f", "ogg"])
|
||||
elif fmt == "opus":
|
||||
cmd.extend([
|
||||
"-vn",
|
||||
"-c:a",
|
||||
"libopus",
|
||||
"-b:a",
|
||||
"192k",
|
||||
])
|
||||
cmd.extend(["-f", "opus"])
|
||||
elif fmt == "audio":
|
||||
# Legacy format name for mp3
|
||||
cmd.extend([
|
||||
"-vn",
|
||||
"-c:a",
|
||||
"libmp3lame",
|
||||
"-b:a",
|
||||
"192k",
|
||||
])
|
||||
cmd.extend(["-f", "mp3"])
|
||||
elif fmt != "copy":
|
||||
raise ValueError(f"Unsupported format: {fmt}")
|
||||
|
||||
cmd.append(str(output_path))
|
||||
return cmd
|
||||
|
||||
|
||||
@@ -633,7 +633,13 @@ class ProgressFileReader:
|
||||
min_interval_s: float = 0.25,
|
||||
):
|
||||
self._f = fileobj
|
||||
self._total = int(total_bytes) if total_bytes not in (None, 0, "") else 0
|
||||
if total_bytes is None:
|
||||
self._total = 0
|
||||
else:
|
||||
try:
|
||||
self._total = int(total_bytes)
|
||||
except Exception:
|
||||
self._total = 0
|
||||
self._label = str(label or "upload")
|
||||
self._min_interval_s = max(0.05, float(min_interval_s))
|
||||
self._bar = ProgressBar()
|
||||
|
||||
@@ -7,7 +7,7 @@ import sys
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass, field
|
||||
from contextvars import ContextVar
|
||||
from typing import Any, Dict, List, Optional, Sequence
|
||||
from typing import Any, Dict, List, Optional, Sequence, Callable
|
||||
from SYS.models import PipelineStageContext
|
||||
from SYS.logger import log, debug, is_debug_enabled
|
||||
from SYS.worker import WorkerManagerRegistry, WorkerStages
|
||||
@@ -15,6 +15,9 @@ from SYS.cli_parsing import SelectionSyntax, SelectionFilterSyntax
|
||||
from SYS.rich_display import stdout_console
|
||||
from SYS.background_notifier import ensure_background_notifier
|
||||
from SYS.result_table import Table
|
||||
import re
|
||||
from datetime import datetime
|
||||
from SYS.cmdlet_catalog import import_cmd_module
|
||||
|
||||
HELP_EXAMPLE_SOURCE_COMMANDS = {
|
||||
".help-example",
|
||||
@@ -946,8 +949,9 @@ def get_last_result_table_source_command() -> Optional[str]:
|
||||
Command name (e.g., 'download-file') or None if not set
|
||||
"""
|
||||
state = _get_pipeline_state()
|
||||
if _is_selectable_table(state.last_result_table) and hasattr(state.last_result_table, "source_command"):
|
||||
return state.last_result_table.source_command
|
||||
table = state.last_result_table
|
||||
if table is not None and _is_selectable_table(table) and hasattr(table, "source_command"):
|
||||
return getattr(table, "source_command")
|
||||
return None
|
||||
|
||||
|
||||
@@ -958,8 +962,9 @@ def get_last_result_table_source_args() -> List[str]:
|
||||
List of arguments (e.g., ['https://example.com']) or empty list
|
||||
"""
|
||||
state = _get_pipeline_state()
|
||||
if _is_selectable_table(state.last_result_table) and hasattr(state.last_result_table, "source_args"):
|
||||
return state.last_result_table.source_args or []
|
||||
table = state.last_result_table
|
||||
if table is not None and _is_selectable_table(table) and hasattr(table, "source_args"):
|
||||
return getattr(table, "source_args") or []
|
||||
return []
|
||||
|
||||
|
||||
@@ -973,22 +978,26 @@ def get_last_result_table_row_selection_args(row_index: int) -> Optional[List[st
|
||||
Selection arguments (e.g., ['-item', '3']) or None
|
||||
"""
|
||||
state = _get_pipeline_state()
|
||||
if _is_selectable_table(state.last_result_table) and hasattr(state.last_result_table, "rows"):
|
||||
if 0 <= row_index < len(state.last_result_table.rows):
|
||||
row = state.last_result_table.rows[row_index]
|
||||
table = state.last_result_table
|
||||
if table is not None and _is_selectable_table(table) and hasattr(table, "rows"):
|
||||
rows = table.rows
|
||||
if 0 <= row_index < len(rows):
|
||||
row = rows[row_index]
|
||||
if hasattr(row, "selection_args"):
|
||||
return row.selection_args
|
||||
return getattr(row, "selection_args")
|
||||
return None
|
||||
|
||||
|
||||
def get_last_result_table_row_selection_action(row_index: int) -> Optional[List[str]]:
|
||||
"""Get the expanded stage tokens for a row in the last result table."""
|
||||
state = _get_pipeline_state()
|
||||
if _is_selectable_table(state.last_result_table) and hasattr(state.last_result_table, "rows"):
|
||||
if 0 <= row_index < len(state.last_result_table.rows):
|
||||
row = state.last_result_table.rows[row_index]
|
||||
table = state.last_result_table
|
||||
if table is not None and _is_selectable_table(table) and hasattr(table, "rows"):
|
||||
rows = table.rows
|
||||
if 0 <= row_index < len(rows):
|
||||
row = rows[row_index]
|
||||
if hasattr(row, "selection_action"):
|
||||
return row.selection_action
|
||||
return getattr(row, "selection_action")
|
||||
return None
|
||||
|
||||
def set_current_stage_table(result_table: Optional[Any]) -> None:
|
||||
@@ -1019,8 +1028,9 @@ def get_current_stage_table_source_command() -> Optional[str]:
|
||||
Command name (e.g., 'download-file') or None
|
||||
"""
|
||||
state = _get_pipeline_state()
|
||||
if _is_selectable_table(state.current_stage_table) and hasattr(state.current_stage_table, "source_command"):
|
||||
return state.current_stage_table.source_command
|
||||
table = state.current_stage_table
|
||||
if table is not None and _is_selectable_table(table) and hasattr(table, "source_command"):
|
||||
return getattr(table, "source_command")
|
||||
return None
|
||||
|
||||
|
||||
@@ -1031,8 +1041,9 @@ def get_current_stage_table_source_args() -> List[str]:
|
||||
List of arguments or empty list
|
||||
"""
|
||||
state = _get_pipeline_state()
|
||||
if _is_selectable_table(state.current_stage_table) and hasattr(state.current_stage_table, "source_args"):
|
||||
return state.current_stage_table.source_args or []
|
||||
table = state.current_stage_table
|
||||
if table is not None and _is_selectable_table(table) and hasattr(table, "source_args"):
|
||||
return getattr(table, "source_args") or []
|
||||
return []
|
||||
|
||||
|
||||
@@ -1046,22 +1057,26 @@ def get_current_stage_table_row_selection_args(row_index: int) -> Optional[List[
|
||||
Selection arguments or None
|
||||
"""
|
||||
state = _get_pipeline_state()
|
||||
if _is_selectable_table(state.current_stage_table) and hasattr(state.current_stage_table, "rows"):
|
||||
if 0 <= row_index < len(state.current_stage_table.rows):
|
||||
row = state.current_stage_table.rows[row_index]
|
||||
table = state.current_stage_table
|
||||
if table is not None and _is_selectable_table(table) and hasattr(table, "rows"):
|
||||
rows = table.rows
|
||||
if 0 <= row_index < len(rows):
|
||||
row = rows[row_index]
|
||||
if hasattr(row, "selection_args"):
|
||||
return row.selection_args
|
||||
return getattr(row, "selection_args")
|
||||
return None
|
||||
|
||||
|
||||
def get_current_stage_table_row_selection_action(row_index: int) -> Optional[List[str]]:
|
||||
"""Get the expanded stage tokens for a row in the current stage table."""
|
||||
state = _get_pipeline_state()
|
||||
if _is_selectable_table(state.current_stage_table) and hasattr(state.current_stage_table, "rows"):
|
||||
if 0 <= row_index < len(state.current_stage_table.rows):
|
||||
row = state.current_stage_table.rows[row_index]
|
||||
table = state.current_stage_table
|
||||
if table is not None and _is_selectable_table(table) and hasattr(table, "rows"):
|
||||
rows = table.rows
|
||||
if 0 <= row_index < len(rows):
|
||||
row = rows[row_index]
|
||||
if hasattr(row, "selection_action"):
|
||||
return row.selection_action
|
||||
return getattr(row, "selection_action")
|
||||
return None
|
||||
|
||||
|
||||
@@ -1072,9 +1087,11 @@ def get_current_stage_table_row_source_index(row_index: int) -> Optional[int]:
|
||||
back to the original item order (e.g., playlist or provider order).
|
||||
"""
|
||||
state = _get_pipeline_state()
|
||||
if _is_selectable_table(state.current_stage_table) and hasattr(state.current_stage_table, "rows"):
|
||||
if 0 <= row_index < len(state.current_stage_table.rows):
|
||||
row = state.current_stage_table.rows[row_index]
|
||||
table = state.current_stage_table
|
||||
if table is not None and _is_selectable_table(table) and hasattr(table, "rows"):
|
||||
rows = table.rows
|
||||
if 0 <= row_index < len(rows):
|
||||
row = rows[row_index]
|
||||
return getattr(row, "source_index", None)
|
||||
return None
|
||||
|
||||
|
||||
@@ -33,12 +33,15 @@ except ImportError:
|
||||
TEXTUAL_AVAILABLE = False
|
||||
|
||||
|
||||
# Import ResultModel from the API for unification
|
||||
try:
|
||||
from SYS.result_table_api import ResultModel
|
||||
except ImportError:
|
||||
# Fallback if not available yet in directory structure (unlikely)
|
||||
ResultModel = None
|
||||
# Import ResultModel from the API for typing; avoid runtime redefinition issues
|
||||
from typing import TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
from SYS.result_table_api import ResultModel # type: ignore
|
||||
else:
|
||||
ResultModel = None # type: ignore[assignment]
|
||||
|
||||
# Reuse the existing format_bytes helper under a clearer alias
|
||||
from SYS.utils import format_bytes as format_mb
|
||||
|
||||
|
||||
def _sanitize_cell_text(value: Any) -> str:
|
||||
@@ -158,6 +161,8 @@ def extract_hash_value(item: Any) -> str:
|
||||
|
||||
def extract_title_value(item: Any) -> str:
|
||||
data = _as_dict(item) or {}
|
||||
if not isinstance(data, dict):
|
||||
data = {}
|
||||
title = _get_first_dict_value(data, ["title", "name", "filename"])
|
||||
if not title:
|
||||
title = _get_first_dict_value(
|
||||
@@ -171,9 +176,11 @@ def extract_title_value(item: Any) -> str:
|
||||
|
||||
def extract_ext_value(item: Any) -> str:
|
||||
data = _as_dict(item) or {}
|
||||
if not isinstance(data, dict):
|
||||
data = {}
|
||||
|
||||
meta = data.get("metadata") if isinstance(data.get("metadata"),
|
||||
dict) else {}
|
||||
_md = data.get("metadata")
|
||||
meta: Dict[str, Any] = _md if isinstance(_md, dict) else {}
|
||||
raw_path = data.get("path") or data.get("target") or data.get(
|
||||
"filename"
|
||||
) or data.get("title")
|
||||
@@ -206,8 +213,10 @@ def extract_ext_value(item: Any) -> str:
|
||||
|
||||
def extract_size_bytes_value(item: Any) -> Optional[int]:
|
||||
data = _as_dict(item) or {}
|
||||
meta = data.get("metadata") if isinstance(data.get("metadata"),
|
||||
dict) else {}
|
||||
if not isinstance(data, dict):
|
||||
data = {}
|
||||
_md = data.get("metadata")
|
||||
meta: Dict[str, Any] = _md if isinstance(_md, dict) else {}
|
||||
|
||||
size_val = _get_first_dict_value(
|
||||
data,
|
||||
@@ -749,7 +758,7 @@ class Table:
|
||||
row.payload = result
|
||||
|
||||
# Handle ResultModel from the new strict API (SYS/result_table_api.py)
|
||||
if ResultModel and isinstance(result, ResultModel):
|
||||
if ResultModel is not None and isinstance(result, ResultModel):
|
||||
self._add_result_model(row, result)
|
||||
# Handle TagItem from get_tag.py (tag display with index)
|
||||
elif hasattr(result, "__class__") and result.__class__.__name__ == "TagItem":
|
||||
@@ -1573,7 +1582,7 @@ class Table:
|
||||
return None
|
||||
|
||||
# Remaining parts are cmdlet arguments
|
||||
cmdlet_args = {}
|
||||
cmdlet_args: dict[str, Any] = {}
|
||||
i = 1
|
||||
while i < len(parts):
|
||||
part = parts[i]
|
||||
@@ -1906,7 +1915,7 @@ def extract_item_metadata(item: Any) -> Dict[str, Any]:
|
||||
out = {}
|
||||
|
||||
# Handle ResultModel specifically for better detail display
|
||||
if ResultModel and isinstance(item, ResultModel):
|
||||
if ResultModel is not None and isinstance(item, ResultModel):
|
||||
if item.title: out["Title"] = item.title
|
||||
if item.path: out["Path"] = item.path
|
||||
if item.ext: out["Ext"] = item.ext
|
||||
@@ -1964,34 +1973,30 @@ def extract_item_metadata(item: Any) -> Dict[str, Any]:
|
||||
if e: out["Ext"] = e
|
||||
|
||||
size = extract_size_bytes_value(item)
|
||||
if size:
|
||||
out["Size"] = size
|
||||
if size is not None:
|
||||
out["Size"] = format_mb(size)
|
||||
else:
|
||||
s = data.get("size") or data.get("size_bytes")
|
||||
if s: out["Size"] = s
|
||||
|
||||
if s is not None:
|
||||
out["Size"] = str(s)
|
||||
|
||||
# Duration
|
||||
dur = _get_first_dict_value(data, ["duration_seconds", "duration"])
|
||||
if dur:
|
||||
out["Duration"] = _format_duration_hms(dur)
|
||||
|
||||
|
||||
# URL
|
||||
url = _get_first_dict_value(data, ["url", "URL"])
|
||||
if url:
|
||||
out["Url"] = url
|
||||
else:
|
||||
out["Url"] = None # Explicitly None for <null> display
|
||||
out["Url"] = str(url) if url else ""
|
||||
|
||||
# Relationships
|
||||
rels = _get_first_dict_value(data, ["relationships", "rel"])
|
||||
if rels:
|
||||
out["Relations"] = rels
|
||||
else:
|
||||
out["Relations"] = None
|
||||
out["Relations"] = str(rels) if rels else ""
|
||||
|
||||
# Tags Summary
|
||||
tags = _get_first_dict_value(data, ["tags", "tag"])
|
||||
if tags: out["Tags"] = tags
|
||||
if tags:
|
||||
out["Tags"] = ", ".join([str(t) for t in (tags if isinstance(tags, (list, tuple)) else [tags])])
|
||||
|
||||
return out
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import sys
|
||||
from typing import Any, Iterator, TextIO
|
||||
from typing import Any, Iterator, TextIO, List, Dict, Optional, Tuple, cast
|
||||
|
||||
from rich.console import Console
|
||||
from rich.panel import Panel
|
||||
@@ -200,8 +200,8 @@ def render_image_to_console(image_path: str | Path, max_width: int | None = None
|
||||
if not path.exists() or not path.is_file():
|
||||
return
|
||||
|
||||
with Image.open(path) as img:
|
||||
img = img.convert("RGB")
|
||||
with Image.open(path) as opened_img:
|
||||
img = opened_img.convert("RGB")
|
||||
orig_w, orig_h = img.size
|
||||
|
||||
# Determine target dimensions
|
||||
@@ -235,14 +235,21 @@ def render_image_to_console(image_path: str | Path, max_width: int | None = None
|
||||
|
||||
img = img.resize((target_w, target_h), Image.Resampling.BILINEAR)
|
||||
pixels = img.load()
|
||||
if pixels is None:
|
||||
return
|
||||
|
||||
# Render using upper half block (U+2580)
|
||||
# Each character row in terminal represents 2 pixel rows in image.
|
||||
for y in range(0, target_h - 1, 2):
|
||||
line = Text()
|
||||
for x in range(target_w):
|
||||
r1, g1, b1 = pixels[x, y]
|
||||
r2, g2, b2 = pixels[x, y + 1]
|
||||
rgb1 = cast(tuple, pixels[x, y])
|
||||
rgb2 = cast(tuple, pixels[x, y + 1])
|
||||
try:
|
||||
r1, g1, b1 = int(rgb1[0]), int(rgb1[1]), int(rgb1[2])
|
||||
r2, g2, b2 = int(rgb2[0]), int(rgb2[1]), int(rgb2[2])
|
||||
except Exception:
|
||||
r1 = g1 = b1 = r2 = g2 = b2 = 0
|
||||
# Foreground is top pixel, background is bottom pixel
|
||||
line.append(
|
||||
"▀",
|
||||
|
||||
@@ -21,7 +21,7 @@ from dataclasses import dataclass, field
|
||||
from fnmatch import fnmatch
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import SYS.utils_constant
|
||||
from SYS.utils_constant import mime_maps
|
||||
|
||||
try:
|
||||
import cbor2
|
||||
@@ -140,7 +140,7 @@ def create_metadata_sidecar(file_path: Path, metadata: dict) -> None:
|
||||
metadata["hash"] = sha256_file(file_path)
|
||||
metadata["size"] = Path(file_path).stat().st_size
|
||||
format_found = False
|
||||
for mime_type, ext_map in SYS.utils_constant.mime_maps.items():
|
||||
for mime_type, ext_map in mime_maps.items():
|
||||
for key, info in ext_map.items():
|
||||
if info.get("ext") == file_ext:
|
||||
metadata["type"] = mime_type
|
||||
@@ -516,7 +516,7 @@ def get_api_key(config: dict[str, Any], service: str, key_path: str) -> str | No
|
||||
"""
|
||||
try:
|
||||
parts = key_path.split(".")
|
||||
value = config
|
||||
value: Any = config
|
||||
for part in parts:
|
||||
if isinstance(value, dict):
|
||||
value = value.get(part)
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
mime_maps = {
|
||||
from typing import Any, Dict
|
||||
|
||||
mime_maps: Dict[str, Dict[str, Dict[str, Any]]] = {
|
||||
"image": {
|
||||
"jpg": {
|
||||
"ext": ".jpg",
|
||||
|
||||
@@ -5,7 +5,7 @@ import io
|
||||
import sys
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Set, TextIO
|
||||
from typing import Any, Dict, Optional, Set, TextIO, Sequence
|
||||
|
||||
from SYS.config import get_local_storage_path
|
||||
from SYS.worker_manager import WorkerManager
|
||||
@@ -189,9 +189,7 @@ class WorkerManagerRegistry:
|
||||
manager.expire_running_workers(
|
||||
older_than_seconds=120,
|
||||
worker_id_prefix="cli_%",
|
||||
reason=(
|
||||
"CLI session ended unexpectedly; marking worker as failed",
|
||||
),
|
||||
reason="CLI session ended unexpectedly; marking worker as failed",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -47,8 +47,8 @@ class Worker:
|
||||
self.details = ""
|
||||
self.error_message = ""
|
||||
self.result = "pending"
|
||||
self._stdout_buffer = []
|
||||
self._steps_buffer = []
|
||||
self._stdout_buffer: list[str] = []
|
||||
self._steps_buffer: list[str] = []
|
||||
|
||||
def log_step(self, step_text: str) -> None:
|
||||
"""Log a step for this worker.
|
||||
@@ -108,18 +108,26 @@ class Worker:
|
||||
logger.error(f"Error getting steps for worker {self.id}: {e}")
|
||||
return ""
|
||||
|
||||
def update_progress(self, progress: str = "", details: str = "") -> None:
|
||||
def update_progress(self, progress: float | str = 0.0, details: str = "") -> None:
|
||||
"""Update worker progress.
|
||||
|
||||
Args:
|
||||
progress: Progress string (e.g., "50%")
|
||||
progress: Progress value (float) or textual like "50%"; will be coerced to float
|
||||
details: Additional details
|
||||
"""
|
||||
self.progress = progress
|
||||
self.progress = str(progress)
|
||||
self.details = details
|
||||
try:
|
||||
if self.manager:
|
||||
self.manager.update_worker(self.id, progress, details)
|
||||
# Normalize to a float value for the manager API (0-100)
|
||||
try:
|
||||
if isinstance(progress, str) and progress.endswith('%'):
|
||||
progress_value = float(progress.rstrip('%'))
|
||||
else:
|
||||
progress_value = float(progress)
|
||||
except Exception:
|
||||
progress_value = 0.0
|
||||
self.manager.update_worker(self.id, progress_value, details)
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating worker {self.id}: {e}")
|
||||
|
||||
@@ -165,7 +173,7 @@ class WorkerLoggingHandler(logging.StreamHandler):
|
||||
self.db = db
|
||||
self.manager = manager
|
||||
self.buffer_size = buffer_size
|
||||
self.buffer = []
|
||||
self.buffer: list[str] = []
|
||||
self._lock = Lock()
|
||||
|
||||
# Set a format that includes timestamp and level
|
||||
@@ -278,14 +286,6 @@ class WorkerManager:
|
||||
self._stdout_flush_bytes = 4096
|
||||
self._stdout_flush_interval = 0.75
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the database connection."""
|
||||
if self.db:
|
||||
try:
|
||||
with self._db_lock:
|
||||
self.db.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def __enter__(self):
|
||||
"""Context manager entry."""
|
||||
@@ -478,7 +478,7 @@ class WorkerManager:
|
||||
True if update was successful
|
||||
"""
|
||||
try:
|
||||
kwargs = {}
|
||||
kwargs: dict[str, Any] = {}
|
||||
if progress > 0:
|
||||
kwargs["progress"] = progress
|
||||
if current_step:
|
||||
|
||||
Reference in New Issue
Block a user