This commit is contained in:
nose
2025-12-11 12:47:30 -08:00
parent 6b05dc5552
commit 65d12411a2
92 changed files with 17447 additions and 14308 deletions

225
models.py
View File

@@ -16,134 +16,183 @@ from typing import Any, Callable, Dict, List, Optional, Protocol, TextIO, Tuple
class PipeObject:
"""Unified pipeline object for tracking files, metadata, tags, and relationships through the pipeline.
This is the single source of truth for all result data in the pipeline. It can represent:
- Tag extraction results (IMDb, MusicBrainz, OpenLibrary lookups)
- Remote metadata fetches
- File operations with metadata/tags and relationship tracking
- Search results
- Files with version relationships (king/alt/related)
This is the single source of truth for all result data in the pipeline. Uses the hash+store
canonical pattern for file identification.
Attributes:
source: Source of the object (e.g., 'imdb', 'musicbrainz', 'libgen', 'debrid', 'file', etc.)
identifier: Unique identifier from the source (e.g., IMDb ID, MBID, magnet hash, file hash)
hash: SHA-256 hash of the file (canonical identifier)
store: Storage backend name (e.g., 'default', 'hydrus', 'test', 'home')
tags: List of extracted or assigned tags
title: Human-readable title if applicable
source_url: URL where the object came from
duration: Duration in seconds if applicable
metadata: Full metadata dictionary from source
remote_metadata: Additional remote metadata
warnings: Any warnings or issues encountered
mpv_metadata: MPV-specific metadata if applicable
file_path: Path to the file if this object represents a file
file_hash: SHA-256 hash of the file for integrity and relationship tracking
king_hash: Hash of the primary/master version of this file (for alternates)
alt_hashes: List of hashes for alternate versions of this file
related_hashes: List of hashes for related files (e.g., screenshots, editions)
path: Path to the file if this object represents a file
relationships: Relationship data (king/alt/related hashes)
is_temp: If True, this is a temporary/intermediate artifact that may be cleaned up
action: The cmdlet that created this object (format: 'cmdlet:cmdlet_name', e.g., 'cmdlet:get-file')
parent_id: Hash of the parent file in the pipeline chain (for tracking provenance/lineage)
action: The cmdlet that created this object (format: 'cmdlet:cmdlet_name')
parent_hash: Hash of the parent file in the pipeline chain (for tracking provenance/lineage)
extra: Additional fields not covered above
"""
source: str
identifier: str
hash: str
store: str
tags: List[str] = field(default_factory=list)
title: Optional[str] = None
url: Optional[str] = None
source_url: Optional[str] = None
duration: Optional[float] = None
metadata: Dict[str, Any] = field(default_factory=dict)
remote_metadata: Optional[Dict[str, Any]] = None
warnings: List[str] = field(default_factory=list)
mpv_metadata: Optional[Dict[str, Any]] = None
file_path: Optional[str] = None
file_hash: Optional[str] = None
king_hash: Optional[str] = None
alt_hashes: List[str] = field(default_factory=list)
related_hashes: List[str] = field(default_factory=list)
path: Optional[str] = None
relationships: Dict[str, Any] = field(default_factory=dict)
is_temp: bool = False
action: Optional[str] = None
parent_id: Optional[str] = None
parent_hash: Optional[str] = None
extra: Dict[str, Any] = field(default_factory=dict)
def register_as_king(self, file_hash: str) -> None:
"""Register this object as the king (primary) version of a file."""
self.king_hash = file_hash
def add_alternate(self, alt_hash: str) -> None:
"""Add an alternate version hash for this file."""
if alt_hash not in self.alt_hashes:
self.alt_hashes.append(alt_hash)
def add_related(self, related_hash: str) -> None:
"""Add a related file hash (e.g., screenshot, edition)."""
if related_hash not in self.related_hashes:
self.related_hashes.append(related_hash)
def add_relationship(self, rel_type: str, rel_hash: str) -> None:
"""Add a relationship hash.
Args:
rel_type: Relationship type ('king', 'alt', 'related')
rel_hash: Hash to add to the relationship
"""
if rel_type not in self.relationships:
self.relationships[rel_type] = []
if isinstance(self.relationships[rel_type], list):
if rel_hash not in self.relationships[rel_type]:
self.relationships[rel_type].append(rel_hash)
else:
# Single value (e.g., king), convert to that value
self.relationships[rel_type] = rel_hash
def get_relationships(self) -> Dict[str, Any]:
"""Get all relationships for this object."""
rels = {}
if self.king_hash:
rels["king"] = self.king_hash
if self.alt_hashes:
rels["alt"] = self.alt_hashes
if self.related_hashes:
rels["related"] = self.related_hashes
return rels
return self.relationships.copy() if self.relationships else {}
def debug_table(self) -> None:
"""Print a formatted debug table showing PipeObject state.
Only prints when debug logging is enabled. Useful for tracking
object state throughout the pipeline.
"""
try:
from helper.logger import is_debug_enabled, debug
if not is_debug_enabled():
return
except Exception:
return
# Prepare display values
hash_display = self.hash or "N/A"
store_display = self.store or "N/A"
title_display = self.title or "N/A"
tags_display = ", ".join(self.tags[:3]) if self.tags else "[]"
if len(self.tags) > 3:
tags_display += f" (+{len(self.tags) - 3} more)"
file_path_display = self.path or "N/A"
if file_path_display != "N/A" and len(file_path_display) > 50:
file_path_display = "..." + file_path_display[-47:]
url_display = self.url or "N/A"
if url_display != "N/A" and len(url_display) > 48:
url_display = url_display[:45] + "..."
relationships_display = "N/A"
if self.relationships:
rel_parts = []
for key, val in self.relationships.items():
if isinstance(val, list):
rel_parts.append(f"{key}({len(val)})")
else:
rel_parts.append(key)
relationships_display = ", ".join(rel_parts)
warnings_display = f"{len(self.warnings)} warning(s)" if self.warnings else "none"
# Print table
debug("┌─────────────────────────────────────────────────────────────┐")
debug("│ PipeObject Debug Info │")
debug("├─────────────────────────────────────────────────────────────┤")
debug(f"│ Hash : {hash_display:<48}")
debug(f"│ Store : {store_display:<48}")
debug(f"│ Title : {title_display:<48}")
debug(f"│ Tags : {tags_display:<48}")
debug(f"│ URL : {url_display:<48}")
debug(f"│ File Path : {file_path_display:<48}")
debug(f"│ Relationships: {relationships_display:<47}")
debug(f"│ Warnings : {warnings_display:<48}")
# Show extra keys as individual rows
if self.extra:
debug("├─────────────────────────────────────────────────────────────┤")
debug("│ Extra Fields: │")
for key, val in self.extra.items():
# Format value for display
if isinstance(val, (list, set)):
val_display = f"{type(val).__name__}({len(val)})"
elif isinstance(val, dict):
val_display = f"dict({len(val)})"
elif isinstance(val, (int, float)):
val_display = str(val)
else:
val_str = str(val)
val_display = val_str if len(val_str) <= 40 else val_str[:37] + "..."
# Truncate key if needed
key_display = key if len(key) <= 15 else key[:12] + "..."
debug(f"{key_display:<15}: {val_display:<42}")
if self.action:
debug("├─────────────────────────────────────────────────────────────┤")
action_display = self.action[:48]
debug(f"│ Action : {action_display:<48}")
if self.parent_hash:
if not self.action:
debug("├─────────────────────────────────────────────────────────────┤")
parent_display = self.parent_hash[:12] + "..." if len(self.parent_hash) > 12 else self.parent_hash
debug(f"│ Parent Hash : {parent_display:<48}")
debug("└─────────────────────────────────────────────────────────────┘")
def to_dict(self) -> Dict[str, Any]:
"""Serialize to dictionary, excluding None and empty values."""
data: Dict[str, Any] = {
"source": self.source,
"tags": self.tags,
"hash": self.hash,
"store": self.store,
}
if self.identifier:
data["id"] = self.identifier
if self.tags:
data["tags"] = self.tags
if self.title:
data["title"] = self.title
if self.url:
data["url"] = self.url
if self.source_url:
data["source_url"] = self.source_url
if self.duration is not None:
data["duration"] = self.duration
if self.metadata:
data["metadata"] = self.metadata
if self.remote_metadata is not None:
data["remote_metadata"] = self.remote_metadata
if self.mpv_metadata is not None:
data["mpv_metadata"] = self.mpv_metadata
if self.warnings:
data["warnings"] = self.warnings
if self.file_path:
data["file_path"] = self.file_path
if self.file_hash:
data["file_hash"] = self.file_hash
# Include pipeline chain tracking fields
if self.path:
data["path"] = self.path
if self.relationships:
data["relationships"] = self.relationships
if self.is_temp:
data["is_temp"] = self.is_temp
if self.action:
data["action"] = self.action
if self.parent_id:
data["parent_id"] = self.parent_id
# Include relationship data if present
rels = self.get_relationships()
if rels:
data["relationships"] = rels
if self.parent_hash:
data["parent_hash"] = self.parent_hash
# Add extra fields
data.update({k: v for k, v in self.extra.items() if v is not None})
return data
@property
def hash(self) -> str:
"""Compute SHA-256 hash from source and identifier."""
base = f"{self.source}:{self.identifier}"
return hashlib.sha256(base.encode('utf-8')).hexdigest()
# Backwards compatibility aliases
def as_dict(self) -> Dict[str, Any]:
"""Alias for to_dict() for backwards compatibility."""
return self.to_dict()
def to_serializable(self) -> Dict[str, Any]:
"""Alias for to_dict() for backwards compatibility."""
return self.to_dict()
class FileRelationshipTracker:
"""Track relationships between files for sidecar creation.
@@ -235,6 +284,7 @@ class DownloadOptions:
clip_sections: Optional[str] = None
playlist_items: Optional[str] = None # yt-dlp --playlist-items format (e.g., "1-3,5,8")
no_playlist: bool = False # If True, pass --no-playlist to yt-dlp
quiet: bool = False # If True, suppress all console output (progress, debug logs)
class SendFunc(Protocol):
@@ -546,18 +596,25 @@ class ProgressBar:
class PipelineStageContext:
"""Context information for the current pipeline stage."""
def __init__(self, stage_index: int, total_stages: int):
def __init__(self, stage_index: int, total_stages: int, worker_id: Optional[str] = None):
self.stage_index = stage_index
self.total_stages = total_stages
self.is_last_stage = (stage_index == total_stages - 1)
self.worker_id = worker_id
self.emits: List[Any] = []
def emit(self, obj: Any) -> None:
"""Emit an object to the next pipeline stage."""
self.emits.append(obj)
def get_current_command_text(self) -> str:
"""Get the current command text (for backward compatibility)."""
# This is maintained for backward compatibility with old code
# In a real implementation, this would come from the stage context
return ""
def __repr__(self) -> str:
return f"PipelineStageContext(stage={self.stage_index}/{self.total_stages}, is_last={self.is_last_stage})"
return f"PipelineStageContext(stage={self.stage_index}/{self.total_stages}, is_last={self.is_last_stage}, worker_id={self.worker_id})"
# ============================================================================