dfdkflj
This commit is contained in:
225
models.py
225
models.py
@@ -16,134 +16,183 @@ from typing import Any, Callable, Dict, List, Optional, Protocol, TextIO, Tuple
|
||||
class PipeObject:
|
||||
"""Unified pipeline object for tracking files, metadata, tags, and relationships through the pipeline.
|
||||
|
||||
This is the single source of truth for all result data in the pipeline. It can represent:
|
||||
- Tag extraction results (IMDb, MusicBrainz, OpenLibrary lookups)
|
||||
- Remote metadata fetches
|
||||
- File operations with metadata/tags and relationship tracking
|
||||
- Search results
|
||||
- Files with version relationships (king/alt/related)
|
||||
This is the single source of truth for all result data in the pipeline. Uses the hash+store
|
||||
canonical pattern for file identification.
|
||||
|
||||
Attributes:
|
||||
source: Source of the object (e.g., 'imdb', 'musicbrainz', 'libgen', 'debrid', 'file', etc.)
|
||||
identifier: Unique identifier from the source (e.g., IMDb ID, MBID, magnet hash, file hash)
|
||||
hash: SHA-256 hash of the file (canonical identifier)
|
||||
store: Storage backend name (e.g., 'default', 'hydrus', 'test', 'home')
|
||||
tags: List of extracted or assigned tags
|
||||
title: Human-readable title if applicable
|
||||
source_url: URL where the object came from
|
||||
duration: Duration in seconds if applicable
|
||||
metadata: Full metadata dictionary from source
|
||||
remote_metadata: Additional remote metadata
|
||||
warnings: Any warnings or issues encountered
|
||||
mpv_metadata: MPV-specific metadata if applicable
|
||||
file_path: Path to the file if this object represents a file
|
||||
file_hash: SHA-256 hash of the file for integrity and relationship tracking
|
||||
king_hash: Hash of the primary/master version of this file (for alternates)
|
||||
alt_hashes: List of hashes for alternate versions of this file
|
||||
related_hashes: List of hashes for related files (e.g., screenshots, editions)
|
||||
path: Path to the file if this object represents a file
|
||||
relationships: Relationship data (king/alt/related hashes)
|
||||
is_temp: If True, this is a temporary/intermediate artifact that may be cleaned up
|
||||
action: The cmdlet that created this object (format: 'cmdlet:cmdlet_name', e.g., 'cmdlet:get-file')
|
||||
parent_id: Hash of the parent file in the pipeline chain (for tracking provenance/lineage)
|
||||
action: The cmdlet that created this object (format: 'cmdlet:cmdlet_name')
|
||||
parent_hash: Hash of the parent file in the pipeline chain (for tracking provenance/lineage)
|
||||
extra: Additional fields not covered above
|
||||
"""
|
||||
source: str
|
||||
identifier: str
|
||||
hash: str
|
||||
store: str
|
||||
tags: List[str] = field(default_factory=list)
|
||||
title: Optional[str] = None
|
||||
url: Optional[str] = None
|
||||
source_url: Optional[str] = None
|
||||
duration: Optional[float] = None
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
remote_metadata: Optional[Dict[str, Any]] = None
|
||||
warnings: List[str] = field(default_factory=list)
|
||||
mpv_metadata: Optional[Dict[str, Any]] = None
|
||||
file_path: Optional[str] = None
|
||||
file_hash: Optional[str] = None
|
||||
king_hash: Optional[str] = None
|
||||
alt_hashes: List[str] = field(default_factory=list)
|
||||
related_hashes: List[str] = field(default_factory=list)
|
||||
path: Optional[str] = None
|
||||
relationships: Dict[str, Any] = field(default_factory=dict)
|
||||
is_temp: bool = False
|
||||
action: Optional[str] = None
|
||||
parent_id: Optional[str] = None
|
||||
parent_hash: Optional[str] = None
|
||||
extra: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def register_as_king(self, file_hash: str) -> None:
|
||||
"""Register this object as the king (primary) version of a file."""
|
||||
self.king_hash = file_hash
|
||||
|
||||
def add_alternate(self, alt_hash: str) -> None:
|
||||
"""Add an alternate version hash for this file."""
|
||||
if alt_hash not in self.alt_hashes:
|
||||
self.alt_hashes.append(alt_hash)
|
||||
|
||||
def add_related(self, related_hash: str) -> None:
|
||||
"""Add a related file hash (e.g., screenshot, edition)."""
|
||||
if related_hash not in self.related_hashes:
|
||||
self.related_hashes.append(related_hash)
|
||||
def add_relationship(self, rel_type: str, rel_hash: str) -> None:
|
||||
"""Add a relationship hash.
|
||||
|
||||
Args:
|
||||
rel_type: Relationship type ('king', 'alt', 'related')
|
||||
rel_hash: Hash to add to the relationship
|
||||
"""
|
||||
if rel_type not in self.relationships:
|
||||
self.relationships[rel_type] = []
|
||||
|
||||
if isinstance(self.relationships[rel_type], list):
|
||||
if rel_hash not in self.relationships[rel_type]:
|
||||
self.relationships[rel_type].append(rel_hash)
|
||||
else:
|
||||
# Single value (e.g., king), convert to that value
|
||||
self.relationships[rel_type] = rel_hash
|
||||
|
||||
def get_relationships(self) -> Dict[str, Any]:
|
||||
"""Get all relationships for this object."""
|
||||
rels = {}
|
||||
if self.king_hash:
|
||||
rels["king"] = self.king_hash
|
||||
if self.alt_hashes:
|
||||
rels["alt"] = self.alt_hashes
|
||||
if self.related_hashes:
|
||||
rels["related"] = self.related_hashes
|
||||
return rels
|
||||
return self.relationships.copy() if self.relationships else {}
|
||||
|
||||
def debug_table(self) -> None:
|
||||
"""Print a formatted debug table showing PipeObject state.
|
||||
|
||||
Only prints when debug logging is enabled. Useful for tracking
|
||||
object state throughout the pipeline.
|
||||
"""
|
||||
try:
|
||||
from helper.logger import is_debug_enabled, debug
|
||||
|
||||
if not is_debug_enabled():
|
||||
return
|
||||
except Exception:
|
||||
return
|
||||
|
||||
# Prepare display values
|
||||
hash_display = self.hash or "N/A"
|
||||
store_display = self.store or "N/A"
|
||||
title_display = self.title or "N/A"
|
||||
tags_display = ", ".join(self.tags[:3]) if self.tags else "[]"
|
||||
if len(self.tags) > 3:
|
||||
tags_display += f" (+{len(self.tags) - 3} more)"
|
||||
file_path_display = self.path or "N/A"
|
||||
if file_path_display != "N/A" and len(file_path_display) > 50:
|
||||
file_path_display = "..." + file_path_display[-47:]
|
||||
|
||||
url_display = self.url or "N/A"
|
||||
if url_display != "N/A" and len(url_display) > 48:
|
||||
url_display = url_display[:45] + "..."
|
||||
|
||||
relationships_display = "N/A"
|
||||
if self.relationships:
|
||||
rel_parts = []
|
||||
for key, val in self.relationships.items():
|
||||
if isinstance(val, list):
|
||||
rel_parts.append(f"{key}({len(val)})")
|
||||
else:
|
||||
rel_parts.append(key)
|
||||
relationships_display = ", ".join(rel_parts)
|
||||
|
||||
warnings_display = f"{len(self.warnings)} warning(s)" if self.warnings else "none"
|
||||
|
||||
# Print table
|
||||
debug("┌─────────────────────────────────────────────────────────────┐")
|
||||
debug("│ PipeObject Debug Info │")
|
||||
debug("├─────────────────────────────────────────────────────────────┤")
|
||||
debug(f"│ Hash : {hash_display:<48}│")
|
||||
debug(f"│ Store : {store_display:<48}│")
|
||||
debug(f"│ Title : {title_display:<48}│")
|
||||
debug(f"│ Tags : {tags_display:<48}│")
|
||||
debug(f"│ URL : {url_display:<48}│")
|
||||
debug(f"│ File Path : {file_path_display:<48}│")
|
||||
debug(f"│ Relationships: {relationships_display:<47}│")
|
||||
debug(f"│ Warnings : {warnings_display:<48}│")
|
||||
|
||||
# Show extra keys as individual rows
|
||||
if self.extra:
|
||||
debug("├─────────────────────────────────────────────────────────────┤")
|
||||
debug("│ Extra Fields: │")
|
||||
for key, val in self.extra.items():
|
||||
# Format value for display
|
||||
if isinstance(val, (list, set)):
|
||||
val_display = f"{type(val).__name__}({len(val)})"
|
||||
elif isinstance(val, dict):
|
||||
val_display = f"dict({len(val)})"
|
||||
elif isinstance(val, (int, float)):
|
||||
val_display = str(val)
|
||||
else:
|
||||
val_str = str(val)
|
||||
val_display = val_str if len(val_str) <= 40 else val_str[:37] + "..."
|
||||
|
||||
# Truncate key if needed
|
||||
key_display = key if len(key) <= 15 else key[:12] + "..."
|
||||
debug(f"│ {key_display:<15}: {val_display:<42}│")
|
||||
|
||||
if self.action:
|
||||
debug("├─────────────────────────────────────────────────────────────┤")
|
||||
action_display = self.action[:48]
|
||||
debug(f"│ Action : {action_display:<48}│")
|
||||
if self.parent_hash:
|
||||
if not self.action:
|
||||
debug("├─────────────────────────────────────────────────────────────┤")
|
||||
parent_display = self.parent_hash[:12] + "..." if len(self.parent_hash) > 12 else self.parent_hash
|
||||
debug(f"│ Parent Hash : {parent_display:<48}│")
|
||||
debug("└─────────────────────────────────────────────────────────────┘")
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Serialize to dictionary, excluding None and empty values."""
|
||||
data: Dict[str, Any] = {
|
||||
"source": self.source,
|
||||
"tags": self.tags,
|
||||
"hash": self.hash,
|
||||
"store": self.store,
|
||||
}
|
||||
if self.identifier:
|
||||
data["id"] = self.identifier
|
||||
|
||||
if self.tags:
|
||||
data["tags"] = self.tags
|
||||
if self.title:
|
||||
data["title"] = self.title
|
||||
if self.url:
|
||||
data["url"] = self.url
|
||||
if self.source_url:
|
||||
data["source_url"] = self.source_url
|
||||
if self.duration is not None:
|
||||
data["duration"] = self.duration
|
||||
if self.metadata:
|
||||
data["metadata"] = self.metadata
|
||||
if self.remote_metadata is not None:
|
||||
data["remote_metadata"] = self.remote_metadata
|
||||
if self.mpv_metadata is not None:
|
||||
data["mpv_metadata"] = self.mpv_metadata
|
||||
if self.warnings:
|
||||
data["warnings"] = self.warnings
|
||||
if self.file_path:
|
||||
data["file_path"] = self.file_path
|
||||
if self.file_hash:
|
||||
data["file_hash"] = self.file_hash
|
||||
# Include pipeline chain tracking fields
|
||||
if self.path:
|
||||
data["path"] = self.path
|
||||
if self.relationships:
|
||||
data["relationships"] = self.relationships
|
||||
if self.is_temp:
|
||||
data["is_temp"] = self.is_temp
|
||||
if self.action:
|
||||
data["action"] = self.action
|
||||
if self.parent_id:
|
||||
data["parent_id"] = self.parent_id
|
||||
# Include relationship data if present
|
||||
rels = self.get_relationships()
|
||||
if rels:
|
||||
data["relationships"] = rels
|
||||
if self.parent_hash:
|
||||
data["parent_hash"] = self.parent_hash
|
||||
|
||||
# Add extra fields
|
||||
data.update({k: v for k, v in self.extra.items() if v is not None})
|
||||
return data
|
||||
|
||||
@property
|
||||
def hash(self) -> str:
|
||||
"""Compute SHA-256 hash from source and identifier."""
|
||||
base = f"{self.source}:{self.identifier}"
|
||||
return hashlib.sha256(base.encode('utf-8')).hexdigest()
|
||||
|
||||
# Backwards compatibility aliases
|
||||
def as_dict(self) -> Dict[str, Any]:
|
||||
"""Alias for to_dict() for backwards compatibility."""
|
||||
return self.to_dict()
|
||||
|
||||
def to_serializable(self) -> Dict[str, Any]:
|
||||
"""Alias for to_dict() for backwards compatibility."""
|
||||
return self.to_dict()
|
||||
|
||||
|
||||
class FileRelationshipTracker:
|
||||
"""Track relationships between files for sidecar creation.
|
||||
@@ -235,6 +284,7 @@ class DownloadOptions:
|
||||
clip_sections: Optional[str] = None
|
||||
playlist_items: Optional[str] = None # yt-dlp --playlist-items format (e.g., "1-3,5,8")
|
||||
no_playlist: bool = False # If True, pass --no-playlist to yt-dlp
|
||||
quiet: bool = False # If True, suppress all console output (progress, debug logs)
|
||||
|
||||
|
||||
class SendFunc(Protocol):
|
||||
@@ -546,18 +596,25 @@ class ProgressBar:
|
||||
class PipelineStageContext:
|
||||
"""Context information for the current pipeline stage."""
|
||||
|
||||
def __init__(self, stage_index: int, total_stages: int):
|
||||
def __init__(self, stage_index: int, total_stages: int, worker_id: Optional[str] = None):
|
||||
self.stage_index = stage_index
|
||||
self.total_stages = total_stages
|
||||
self.is_last_stage = (stage_index == total_stages - 1)
|
||||
self.worker_id = worker_id
|
||||
self.emits: List[Any] = []
|
||||
|
||||
def emit(self, obj: Any) -> None:
|
||||
"""Emit an object to the next pipeline stage."""
|
||||
self.emits.append(obj)
|
||||
|
||||
def get_current_command_text(self) -> str:
|
||||
"""Get the current command text (for backward compatibility)."""
|
||||
# This is maintained for backward compatibility with old code
|
||||
# In a real implementation, this would come from the stage context
|
||||
return ""
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"PipelineStageContext(stage={self.stage_index}/{self.total_stages}, is_last={self.is_last_stage})"
|
||||
return f"PipelineStageContext(stage={self.stage_index}/{self.total_stages}, is_last={self.is_last_stage}, worker_id={self.worker_id})"
|
||||
|
||||
|
||||
# ============================================================================
|
||||
|
||||
Reference in New Issue
Block a user