hj
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -231,5 +231,7 @@ MPV/portable_config/watch_later*
|
|||||||
hydrusnetwork
|
hydrusnetwork
|
||||||
.style.yapf
|
.style.yapf
|
||||||
.yapfignore
|
.yapfignore
|
||||||
|
tests/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
169
API/podcastindex.py
Normal file
169
API/podcastindex.py
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
"""PodcastIndex.org API integration.
|
||||||
|
|
||||||
|
Docs: https://podcastindex-org.github.io/docs-api/
|
||||||
|
|
||||||
|
Authentication headers required for most endpoints:
|
||||||
|
- User-Agent
|
||||||
|
- X-Auth-Key
|
||||||
|
- X-Auth-Date
|
||||||
|
- Authorization (sha1(apiKey + apiSecret + unixTime))
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from .HTTP import HTTPClient
|
||||||
|
|
||||||
|
|
||||||
|
class PodcastIndexError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def build_auth_headers(
|
||||||
|
api_key: str,
|
||||||
|
api_secret: str,
|
||||||
|
*,
|
||||||
|
unix_time: Optional[int] = None,
|
||||||
|
user_agent: str = "downlow/1.0",
|
||||||
|
) -> Dict[str, str]:
|
||||||
|
"""Build PodcastIndex auth headers.
|
||||||
|
|
||||||
|
The API expects X-Auth-Date to be the current UTC unix epoch time
|
||||||
|
(integer string), and Authorization to be the SHA-1 hex digest of
|
||||||
|
`api_key + api_secret + X-Auth-Date`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
key = str(api_key or "").strip()
|
||||||
|
secret = str(api_secret or "").strip()
|
||||||
|
if not key or not secret:
|
||||||
|
raise PodcastIndexError("PodcastIndex api key/secret are required")
|
||||||
|
|
||||||
|
ts = int(unix_time if unix_time is not None else time.time())
|
||||||
|
ts_str = str(ts)
|
||||||
|
|
||||||
|
token = hashlib.sha1((key + secret + ts_str).encode("utf-8")).hexdigest()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"User-Agent": str(user_agent or "downlow/1.0"),
|
||||||
|
"X-Auth-Key": key,
|
||||||
|
"X-Auth-Date": ts_str,
|
||||||
|
"Authorization": token,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PodcastIndexClient:
|
||||||
|
BASE_URL = "https://api.podcastindex.org/api/1.0"
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
api_key: str,
|
||||||
|
api_secret: str,
|
||||||
|
*,
|
||||||
|
user_agent: str = "downlow/1.0",
|
||||||
|
timeout: float = 30.0,
|
||||||
|
):
|
||||||
|
self.api_key = str(api_key or "").strip()
|
||||||
|
self.api_secret = str(api_secret or "").strip()
|
||||||
|
self.user_agent = str(user_agent or "downlow/1.0")
|
||||||
|
self.timeout = float(timeout)
|
||||||
|
|
||||||
|
if not self.api_key or not self.api_secret:
|
||||||
|
raise PodcastIndexError("PodcastIndex api key/secret are required")
|
||||||
|
|
||||||
|
def _get(self, path: str, *, params: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
||||||
|
url = self.BASE_URL.rstrip("/") + "/" + str(path or "").lstrip("/")
|
||||||
|
headers = build_auth_headers(
|
||||||
|
self.api_key,
|
||||||
|
self.api_secret,
|
||||||
|
user_agent=self.user_agent,
|
||||||
|
)
|
||||||
|
|
||||||
|
with HTTPClient(timeout=self.timeout, headers=headers) as client:
|
||||||
|
response = client.get(url, params=params)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
try:
|
||||||
|
return json.loads(response.content.decode("utf-8"))
|
||||||
|
except Exception as exc:
|
||||||
|
raise PodcastIndexError(f"Invalid JSON response: {exc}")
|
||||||
|
|
||||||
|
def search_byterm(self, query: str, *, max_results: int = 10) -> List[Dict[str, Any]]:
|
||||||
|
q = str(query or "").strip()
|
||||||
|
if not q:
|
||||||
|
return []
|
||||||
|
|
||||||
|
max_int = int(max_results)
|
||||||
|
if max_int < 1:
|
||||||
|
max_int = 1
|
||||||
|
if max_int > 1000:
|
||||||
|
max_int = 1000
|
||||||
|
|
||||||
|
data = self._get(
|
||||||
|
"search/byterm",
|
||||||
|
params={
|
||||||
|
"q": q,
|
||||||
|
"max": max_int,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
feeds = data.get("feeds")
|
||||||
|
return feeds if isinstance(feeds, list) else []
|
||||||
|
|
||||||
|
def episodes_byfeedid(self, feed_id: int | str, *, max_results: int = 50) -> List[Dict[str, Any]]:
|
||||||
|
"""List recent episodes for a feed by its PodcastIndex feed id."""
|
||||||
|
try:
|
||||||
|
feed_id_int = int(feed_id)
|
||||||
|
except Exception:
|
||||||
|
feed_id_int = None
|
||||||
|
if feed_id_int is None or feed_id_int <= 0:
|
||||||
|
return []
|
||||||
|
|
||||||
|
max_int = int(max_results)
|
||||||
|
if max_int < 1:
|
||||||
|
max_int = 1
|
||||||
|
if max_int > 1000:
|
||||||
|
max_int = 1000
|
||||||
|
|
||||||
|
data = self._get(
|
||||||
|
"episodes/byfeedid",
|
||||||
|
params={
|
||||||
|
"id": feed_id_int,
|
||||||
|
"max": max_int,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
items = data.get("items")
|
||||||
|
if isinstance(items, list):
|
||||||
|
return items
|
||||||
|
episodes = data.get("episodes")
|
||||||
|
return episodes if isinstance(episodes, list) else []
|
||||||
|
|
||||||
|
def episodes_byfeedurl(self, feed_url: str, *, max_results: int = 50) -> List[Dict[str, Any]]:
|
||||||
|
"""List recent episodes for a feed by its RSS URL."""
|
||||||
|
url = str(feed_url or "").strip()
|
||||||
|
if not url:
|
||||||
|
return []
|
||||||
|
|
||||||
|
max_int = int(max_results)
|
||||||
|
if max_int < 1:
|
||||||
|
max_int = 1
|
||||||
|
if max_int > 1000:
|
||||||
|
max_int = 1000
|
||||||
|
|
||||||
|
data = self._get(
|
||||||
|
"episodes/byfeedurl",
|
||||||
|
params={
|
||||||
|
"url": url,
|
||||||
|
"max": max_int,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
items = data.get("items")
|
||||||
|
if isinstance(items, list):
|
||||||
|
return items
|
||||||
|
episodes = data.get("episodes")
|
||||||
|
return episodes if isinstance(episodes, list) else []
|
||||||
462
CLI.py
462
CLI.py
@@ -18,7 +18,7 @@ import time
|
|||||||
import uuid
|
import uuid
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Callable, Dict, List, Optional, Sequence, Set, TextIO, cast
|
from typing import Any, Callable, Dict, List, Optional, Sequence, Set, TextIO, Tuple, cast
|
||||||
|
|
||||||
import typer
|
import typer
|
||||||
from prompt_toolkit import PromptSession
|
from prompt_toolkit import PromptSession
|
||||||
@@ -135,6 +135,266 @@ class SelectionSyntax:
|
|||||||
return indices if indices else None
|
return indices if indices else None
|
||||||
|
|
||||||
|
|
||||||
|
class SelectionFilterSyntax:
|
||||||
|
"""Parses and applies @"COL:filter" selection filters.
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- CLI tokenization (shlex) strips quotes, so a user input of `@"TITLE:foo"`
|
||||||
|
arrives as `@TITLE:foo`. We support both forms.
|
||||||
|
- Filters apply to the *current selectable table items* (in-memory), not to
|
||||||
|
provider searches.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_OP_RE = re.compile(r"^(>=|<=|!=|==|>|<|=)\s*(.+)$")
|
||||||
|
_DUR_TOKEN_RE = re.compile(r"(?i)(\d+)\s*([hms])")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def parse(token: str) -> Optional[List[Tuple[str, str]]]:
|
||||||
|
"""Return list of (column, raw_expression) or None when not a filter token."""
|
||||||
|
|
||||||
|
if not token or not str(token).startswith("@"):
|
||||||
|
return None
|
||||||
|
|
||||||
|
if token.strip() == "@*":
|
||||||
|
return None
|
||||||
|
|
||||||
|
# If this is a concrete numeric selection (@2, @1-3, @{1,3}), do not treat it as a filter.
|
||||||
|
try:
|
||||||
|
if SelectionSyntax.parse(str(token)) is not None:
|
||||||
|
return None
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
raw = str(token)[1:].strip()
|
||||||
|
if not raw:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# If quotes survived tokenization, strip a single symmetric wrapper.
|
||||||
|
if len(raw) >= 2 and raw[0] == raw[-1] and raw[0] in ('"', "'"):
|
||||||
|
raw = raw[1:-1].strip()
|
||||||
|
|
||||||
|
# Shorthand: @"foo" means Title contains "foo".
|
||||||
|
if ":" not in raw:
|
||||||
|
if raw:
|
||||||
|
return [("Title", raw)]
|
||||||
|
return None
|
||||||
|
|
||||||
|
parts = [p.strip() for p in raw.split(",") if p.strip()]
|
||||||
|
conditions: List[Tuple[str, str]] = []
|
||||||
|
for part in parts:
|
||||||
|
if ":" not in part:
|
||||||
|
return None
|
||||||
|
col, expr = part.split(":", 1)
|
||||||
|
col = str(col or "").strip()
|
||||||
|
expr = str(expr or "").strip()
|
||||||
|
if not col:
|
||||||
|
return None
|
||||||
|
conditions.append((col, expr))
|
||||||
|
|
||||||
|
return conditions if conditions else None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _norm_key(text: str) -> str:
|
||||||
|
return re.sub(r"\s+", " ", str(text or "").strip().lower())
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _item_column_map(item: Any) -> Dict[str, str]:
|
||||||
|
out: Dict[str, str] = {}
|
||||||
|
|
||||||
|
def _set(k: Any, v: Any) -> None:
|
||||||
|
key = SelectionFilterSyntax._norm_key(str(k or ""))
|
||||||
|
if not key:
|
||||||
|
return
|
||||||
|
if v is None:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
if isinstance(v, (list, tuple, set)):
|
||||||
|
text = ", ".join(str(x) for x in v if x is not None)
|
||||||
|
else:
|
||||||
|
text = str(v)
|
||||||
|
except Exception:
|
||||||
|
return
|
||||||
|
out[key] = text
|
||||||
|
|
||||||
|
if isinstance(item, dict):
|
||||||
|
# Display columns (primary UX surface)
|
||||||
|
cols = item.get("columns")
|
||||||
|
if isinstance(cols, list):
|
||||||
|
for pair in cols:
|
||||||
|
try:
|
||||||
|
if isinstance(pair, (list, tuple)) and len(pair) == 2:
|
||||||
|
_set(pair[0], pair[1])
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
# Direct keys as fallback
|
||||||
|
for k, v in item.items():
|
||||||
|
if k == "columns":
|
||||||
|
continue
|
||||||
|
_set(k, v)
|
||||||
|
else:
|
||||||
|
cols = getattr(item, "columns", None)
|
||||||
|
if isinstance(cols, list):
|
||||||
|
for pair in cols:
|
||||||
|
try:
|
||||||
|
if isinstance(pair, (list, tuple)) and len(pair) == 2:
|
||||||
|
_set(pair[0], pair[1])
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
for k in ("title", "path", "detail", "provider", "store", "table"):
|
||||||
|
try:
|
||||||
|
_set(k, getattr(item, k, None))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_duration_seconds(text: str) -> Optional[int]:
|
||||||
|
s = str(text or "").strip()
|
||||||
|
if not s:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if s.isdigit():
|
||||||
|
try:
|
||||||
|
return max(0, int(s))
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# clock format: M:SS or H:MM:SS
|
||||||
|
if ":" in s:
|
||||||
|
parts = [p.strip() for p in s.split(":")]
|
||||||
|
if len(parts) == 2 and all(p.isdigit() for p in parts):
|
||||||
|
m, sec = parts
|
||||||
|
return max(0, int(m) * 60 + int(sec))
|
||||||
|
if len(parts) == 3 and all(p.isdigit() for p in parts):
|
||||||
|
h, m, sec = parts
|
||||||
|
return max(0, int(h) * 3600 + int(m) * 60 + int(sec))
|
||||||
|
|
||||||
|
# token format: 1h2m3s (tokens can appear in any combination)
|
||||||
|
total = 0
|
||||||
|
found = False
|
||||||
|
for m in SelectionFilterSyntax._DUR_TOKEN_RE.finditer(s):
|
||||||
|
found = True
|
||||||
|
n = int(m.group(1))
|
||||||
|
unit = m.group(2).lower()
|
||||||
|
if unit == "h":
|
||||||
|
total += n * 3600
|
||||||
|
elif unit == "m":
|
||||||
|
total += n * 60
|
||||||
|
elif unit == "s":
|
||||||
|
total += n
|
||||||
|
if found:
|
||||||
|
return max(0, int(total))
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_float(text: str) -> Optional[float]:
|
||||||
|
s = str(text or "").strip()
|
||||||
|
if not s:
|
||||||
|
return None
|
||||||
|
s = s.replace(",", "")
|
||||||
|
try:
|
||||||
|
return float(s)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_op(expr: str) -> tuple[Optional[str], str]:
|
||||||
|
text = str(expr or "").strip()
|
||||||
|
if not text:
|
||||||
|
return None, ""
|
||||||
|
m = SelectionFilterSyntax._OP_RE.match(text)
|
||||||
|
if not m:
|
||||||
|
return None, text
|
||||||
|
return m.group(1), str(m.group(2) or "").strip()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def matches(item: Any, conditions: List[Tuple[str, str]]) -> bool:
|
||||||
|
colmap = SelectionFilterSyntax._item_column_map(item)
|
||||||
|
|
||||||
|
for col, expr in conditions:
|
||||||
|
key = SelectionFilterSyntax._norm_key(col)
|
||||||
|
actual = colmap.get(key)
|
||||||
|
|
||||||
|
# Convenience aliases for common UX names.
|
||||||
|
if actual is None:
|
||||||
|
if key == "duration":
|
||||||
|
actual = colmap.get("duration")
|
||||||
|
elif key == "title":
|
||||||
|
actual = colmap.get("title")
|
||||||
|
|
||||||
|
if actual is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
op, rhs = SelectionFilterSyntax._parse_op(expr)
|
||||||
|
left_text = str(actual or "").strip()
|
||||||
|
right_text = str(rhs or "").strip()
|
||||||
|
|
||||||
|
if op is None:
|
||||||
|
if not right_text:
|
||||||
|
return False
|
||||||
|
if right_text.lower() not in left_text.lower():
|
||||||
|
return False
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Comparator: try duration parsing first when it looks time-like.
|
||||||
|
prefer_duration = (
|
||||||
|
key == "duration"
|
||||||
|
or any(ch in right_text for ch in (":", "h", "m", "s"))
|
||||||
|
or any(ch in left_text for ch in (":", "h", "m", "s"))
|
||||||
|
)
|
||||||
|
|
||||||
|
left_num: Optional[float] = None
|
||||||
|
right_num: Optional[float] = None
|
||||||
|
|
||||||
|
if prefer_duration:
|
||||||
|
ldur = SelectionFilterSyntax._parse_duration_seconds(left_text)
|
||||||
|
rdur = SelectionFilterSyntax._parse_duration_seconds(right_text)
|
||||||
|
if ldur is not None and rdur is not None:
|
||||||
|
left_num = float(ldur)
|
||||||
|
right_num = float(rdur)
|
||||||
|
|
||||||
|
if left_num is None or right_num is None:
|
||||||
|
left_num = SelectionFilterSyntax._parse_float(left_text)
|
||||||
|
right_num = SelectionFilterSyntax._parse_float(right_text)
|
||||||
|
|
||||||
|
if left_num is not None and right_num is not None:
|
||||||
|
if op in ("=", "=="):
|
||||||
|
if not (left_num == right_num):
|
||||||
|
return False
|
||||||
|
elif op == "!=":
|
||||||
|
if not (left_num != right_num):
|
||||||
|
return False
|
||||||
|
elif op == ">":
|
||||||
|
if not (left_num > right_num):
|
||||||
|
return False
|
||||||
|
elif op == ">=":
|
||||||
|
if not (left_num >= right_num):
|
||||||
|
return False
|
||||||
|
elif op == "<":
|
||||||
|
if not (left_num < right_num):
|
||||||
|
return False
|
||||||
|
elif op == "<=":
|
||||||
|
if not (left_num <= right_num):
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Fallback to string equality for =/!= when numeric parsing fails.
|
||||||
|
if op in ("=", "=="):
|
||||||
|
if left_text.lower() != right_text.lower():
|
||||||
|
return False
|
||||||
|
elif op == "!=":
|
||||||
|
if left_text.lower() == right_text.lower():
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
class WorkerOutputMirror(io.TextIOBase):
|
class WorkerOutputMirror(io.TextIOBase):
|
||||||
"""Mirror stdout/stderr to worker manager while preserving console output."""
|
"""Mirror stdout/stderr to worker manager while preserving console output."""
|
||||||
|
|
||||||
@@ -1325,6 +1585,7 @@ class CmdletExecutor:
|
|||||||
filtered_args: List[str] = []
|
filtered_args: List[str] = []
|
||||||
selected_indices: List[int] = []
|
selected_indices: List[int] = []
|
||||||
select_all = False
|
select_all = False
|
||||||
|
selection_filters: List[List[Tuple[str, str]]] = []
|
||||||
|
|
||||||
value_flags: Set[str] = set()
|
value_flags: Set[str] = set()
|
||||||
try:
|
try:
|
||||||
@@ -1357,9 +1618,10 @@ class CmdletExecutor:
|
|||||||
filtered_args.append(arg)
|
filtered_args.append(arg)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if len(arg) >= 2 and arg[1] in {'"',
|
# Universal selection filter: @"COL:expr" (quotes may be stripped by tokenization)
|
||||||
"'"}:
|
filter_spec = SelectionFilterSyntax.parse(arg)
|
||||||
filtered_args.append(arg[1:].strip("\"'"))
|
if filter_spec is not None:
|
||||||
|
selection_filters.append(filter_spec)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if arg.strip() == "@*":
|
if arg.strip() == "@*":
|
||||||
@@ -1384,15 +1646,27 @@ class CmdletExecutor:
|
|||||||
# Piping should require `|` (or an explicit @ selection).
|
# Piping should require `|` (or an explicit @ selection).
|
||||||
piped_items = ctx.get_last_result_items()
|
piped_items = ctx.get_last_result_items()
|
||||||
result: Any = None
|
result: Any = None
|
||||||
if piped_items and (select_all or selected_indices):
|
effective_selected_indices: List[int] = []
|
||||||
if select_all:
|
if piped_items and (select_all or selected_indices or selection_filters):
|
||||||
result = piped_items
|
candidate_idxs = list(range(len(piped_items)))
|
||||||
else:
|
for spec in selection_filters:
|
||||||
result = [
|
candidate_idxs = [
|
||||||
piped_items[idx] for idx in selected_indices
|
i for i in candidate_idxs
|
||||||
if 0 <= idx < len(piped_items)
|
if SelectionFilterSyntax.matches(piped_items[i], spec)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if select_all:
|
||||||
|
effective_selected_indices = list(candidate_idxs)
|
||||||
|
elif selected_indices:
|
||||||
|
effective_selected_indices = [
|
||||||
|
candidate_idxs[i] for i in selected_indices
|
||||||
|
if 0 <= i < len(candidate_idxs)
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
effective_selected_indices = list(candidate_idxs)
|
||||||
|
|
||||||
|
result = [piped_items[i] for i in effective_selected_indices]
|
||||||
|
|
||||||
worker_manager = WorkerManagerRegistry.ensure(config)
|
worker_manager = WorkerManagerRegistry.ensure(config)
|
||||||
stage_session = WorkerStages.begin_stage(
|
stage_session = WorkerStages.begin_stage(
|
||||||
worker_manager,
|
worker_manager,
|
||||||
@@ -1438,7 +1712,7 @@ class CmdletExecutor:
|
|||||||
stage_status = "completed"
|
stage_status = "completed"
|
||||||
stage_error = ""
|
stage_error = ""
|
||||||
|
|
||||||
ctx.set_last_selection(selected_indices)
|
ctx.set_last_selection(effective_selected_indices)
|
||||||
try:
|
try:
|
||||||
try:
|
try:
|
||||||
if hasattr(ctx, "set_current_cmdlet_name"):
|
if hasattr(ctx, "set_current_cmdlet_name"):
|
||||||
@@ -2356,6 +2630,9 @@ class PipelineExecutor:
|
|||||||
elif table_type == "internetarchive":
|
elif table_type == "internetarchive":
|
||||||
print("Auto-loading Internet Archive item via download-file")
|
print("Auto-loading Internet Archive item via download-file")
|
||||||
stages.append(["download-file"])
|
stages.append(["download-file"])
|
||||||
|
elif table_type == "podcastindex.episodes":
|
||||||
|
print("Auto-piping selection to download-file")
|
||||||
|
stages.append(["download-file"])
|
||||||
elif table_type in {"soulseek",
|
elif table_type in {"soulseek",
|
||||||
"openlibrary",
|
"openlibrary",
|
||||||
"libgen"}:
|
"libgen"}:
|
||||||
@@ -2397,6 +2674,14 @@ class PipelineExecutor:
|
|||||||
"Auto-inserting download-file after Internet Archive selection"
|
"Auto-inserting download-file after Internet Archive selection"
|
||||||
)
|
)
|
||||||
stages.insert(0, ["download-file"])
|
stages.insert(0, ["download-file"])
|
||||||
|
if table_type == "podcastindex.episodes" and first_cmd not in (
|
||||||
|
"download-file",
|
||||||
|
"download-media",
|
||||||
|
"download_media",
|
||||||
|
".pipe",
|
||||||
|
):
|
||||||
|
print("Auto-inserting download-file after PodcastIndex episode selection")
|
||||||
|
stages.insert(0, ["download-file"])
|
||||||
if table_type == "libgen" and first_cmd not in (
|
if table_type == "libgen" and first_cmd not in (
|
||||||
"download-file",
|
"download-file",
|
||||||
"download-media",
|
"download-media",
|
||||||
@@ -2614,7 +2899,8 @@ class PipelineExecutor:
|
|||||||
if not stage_tokens:
|
if not stage_tokens:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
cmd_name = stage_tokens[0].replace("_", "-").lower()
|
raw_stage_name = str(stage_tokens[0])
|
||||||
|
cmd_name = raw_stage_name.replace("_", "-").lower()
|
||||||
stage_args = stage_tokens[1:]
|
stage_args = stage_tokens[1:]
|
||||||
|
|
||||||
if cmd_name == "@":
|
if cmd_name == "@":
|
||||||
@@ -2676,12 +2962,14 @@ class PipelineExecutor:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
if cmd_name.startswith("@"): # selection stage
|
if cmd_name.startswith("@"): # selection stage
|
||||||
selection = SelectionSyntax.parse(cmd_name)
|
selection_token = raw_stage_name
|
||||||
is_select_all = cmd_name == "@*"
|
selection = SelectionSyntax.parse(selection_token)
|
||||||
if selection is None and not is_select_all:
|
filter_spec = SelectionFilterSyntax.parse(selection_token)
|
||||||
print(f"Invalid selection: {cmd_name}\n")
|
is_select_all = selection_token.strip() == "@*"
|
||||||
|
if selection is None and filter_spec is None and not is_select_all:
|
||||||
|
print(f"Invalid selection: {selection_token}\n")
|
||||||
pipeline_status = "failed"
|
pipeline_status = "failed"
|
||||||
pipeline_error = f"Invalid selection {cmd_name}"
|
pipeline_error = f"Invalid selection {selection_token}"
|
||||||
return
|
return
|
||||||
|
|
||||||
selected_indices = []
|
selected_indices = []
|
||||||
@@ -2715,6 +3003,11 @@ class PipelineExecutor:
|
|||||||
|
|
||||||
if is_select_all:
|
if is_select_all:
|
||||||
selected_indices = list(range(len(items_list)))
|
selected_indices = list(range(len(items_list)))
|
||||||
|
elif filter_spec is not None:
|
||||||
|
selected_indices = [
|
||||||
|
i for i, item in enumerate(items_list)
|
||||||
|
if SelectionFilterSyntax.matches(item, filter_spec)
|
||||||
|
]
|
||||||
else:
|
else:
|
||||||
selected_indices = sorted(
|
selected_indices = sorted(
|
||||||
[i - 1 for i in selection]
|
[i - 1 for i in selection]
|
||||||
@@ -2731,6 +3024,52 @@ class PipelineExecutor:
|
|||||||
pipeline_error = "Empty selection"
|
pipeline_error = "Empty selection"
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Filter UX: if the stage token is a filter and it's terminal,
|
||||||
|
# render a filtered table overlay rather than selecting/auto-downloading.
|
||||||
|
stage_is_last = (stage_index + 1 >= len(stages))
|
||||||
|
if filter_spec is not None and stage_is_last:
|
||||||
|
try:
|
||||||
|
from SYS.result_table import ResultTable
|
||||||
|
|
||||||
|
base_table = stage_table
|
||||||
|
if base_table is None:
|
||||||
|
base_table = ctx.get_last_result_table()
|
||||||
|
|
||||||
|
if base_table is not None and hasattr(base_table, "copy_with_title"):
|
||||||
|
new_table = base_table.copy_with_title(getattr(base_table, "title", "") or "Results")
|
||||||
|
else:
|
||||||
|
new_table = ResultTable(getattr(base_table, "title", "") if base_table is not None else "Results")
|
||||||
|
|
||||||
|
try:
|
||||||
|
if base_table is not None and getattr(base_table, "table", None):
|
||||||
|
new_table.set_table(str(getattr(base_table, "table")))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Attach a one-line header so users see the active filter.
|
||||||
|
safe = str(selection_token)[1:].strip()
|
||||||
|
new_table.set_header_line(f'filter: "{safe}"')
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
for item in filtered:
|
||||||
|
new_table.add_result(item)
|
||||||
|
|
||||||
|
try:
|
||||||
|
ctx.set_last_result_table_overlay(new_table, items=list(filtered), subject=ctx.get_last_result_subject())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
stdout_console().print()
|
||||||
|
stdout_console().print(new_table)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
continue
|
||||||
|
|
||||||
# UX: selecting a single URL row from get-url tables should open it.
|
# UX: selecting a single URL row from get-url tables should open it.
|
||||||
# Only do this when the selection stage is terminal to avoid surprising
|
# Only do this when the selection stage is terminal to avoid surprising
|
||||||
# side-effects in pipelines like `@1 | download-file`.
|
# side-effects in pipelines like `@1 | download-file`.
|
||||||
@@ -2747,10 +3086,10 @@ class PipelineExecutor:
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
if PipelineExecutor._maybe_run_class_selector(
|
if PipelineExecutor._maybe_run_class_selector(
|
||||||
ctx,
|
ctx,
|
||||||
config,
|
config,
|
||||||
filtered,
|
filtered,
|
||||||
stage_is_last=(stage_index + 1 >= len(stages))):
|
stage_is_last=(stage_index + 1 >= len(stages))):
|
||||||
return
|
return
|
||||||
|
|
||||||
# Special case: selecting multiple tags from get-tag and piping into delete-tag
|
# Special case: selecting multiple tags from get-tag and piping into delete-tag
|
||||||
@@ -2835,9 +3174,82 @@ class PipelineExecutor:
|
|||||||
if current_table and hasattr(current_table,
|
if current_table and hasattr(current_table,
|
||||||
"table") else None
|
"table") else None
|
||||||
)
|
)
|
||||||
if table_type == "youtube" and stage_index + 1 >= len(stages):
|
|
||||||
print("Auto-running YouTube selection via download-media")
|
def _norm_stage_cmd(name: Any) -> str:
|
||||||
stages.append(["download-media", *stage_args])
|
return str(name or "").replace("_", "-").strip().lower()
|
||||||
|
|
||||||
|
next_cmd = None
|
||||||
|
if stage_index + 1 < len(stages) and stages[stage_index + 1]:
|
||||||
|
next_cmd = _norm_stage_cmd(stages[stage_index + 1][0])
|
||||||
|
|
||||||
|
# Auto-insert downloader stages for provider tables.
|
||||||
|
# IMPORTANT: do not auto-download for filter selections; they may match many rows.
|
||||||
|
if filter_spec is None:
|
||||||
|
if stage_index + 1 >= len(stages):
|
||||||
|
if table_type == "youtube":
|
||||||
|
print("Auto-running YouTube selection via download-media")
|
||||||
|
stages.append(["download-media", *stage_args])
|
||||||
|
elif table_type == "bandcamp":
|
||||||
|
print("Auto-running Bandcamp selection via download-media")
|
||||||
|
stages.append(["download-media"])
|
||||||
|
elif table_type == "internetarchive":
|
||||||
|
print("Auto-loading Internet Archive item via download-file")
|
||||||
|
stages.append(["download-file"])
|
||||||
|
elif table_type == "podcastindex.episodes":
|
||||||
|
print("Auto-piping selection to download-file")
|
||||||
|
stages.append(["download-file"])
|
||||||
|
elif table_type in {"soulseek", "openlibrary", "libgen"}:
|
||||||
|
print("Auto-piping selection to download-file")
|
||||||
|
stages.append(["download-file"])
|
||||||
|
else:
|
||||||
|
if table_type == "soulseek" and next_cmd not in (
|
||||||
|
"download-file",
|
||||||
|
"download-media",
|
||||||
|
"download_media",
|
||||||
|
".pipe",
|
||||||
|
):
|
||||||
|
debug("Auto-inserting download-file after Soulseek selection")
|
||||||
|
stages.insert(stage_index + 1, ["download-file"])
|
||||||
|
if table_type == "youtube" and next_cmd not in (
|
||||||
|
"download-media",
|
||||||
|
"download_media",
|
||||||
|
"download-file",
|
||||||
|
".pipe",
|
||||||
|
):
|
||||||
|
debug("Auto-inserting download-media after YouTube selection")
|
||||||
|
stages.insert(stage_index + 1, ["download-media"])
|
||||||
|
if table_type == "bandcamp" and next_cmd not in (
|
||||||
|
"download-media",
|
||||||
|
"download_media",
|
||||||
|
"download-file",
|
||||||
|
".pipe",
|
||||||
|
):
|
||||||
|
print("Auto-inserting download-media after Bandcamp selection")
|
||||||
|
stages.insert(stage_index + 1, ["download-media"])
|
||||||
|
if table_type == "internetarchive" and next_cmd not in (
|
||||||
|
"download-file",
|
||||||
|
"download-media",
|
||||||
|
"download_media",
|
||||||
|
".pipe",
|
||||||
|
):
|
||||||
|
debug("Auto-inserting download-file after Internet Archive selection")
|
||||||
|
stages.insert(stage_index + 1, ["download-file"])
|
||||||
|
if table_type == "podcastindex.episodes" and next_cmd not in (
|
||||||
|
"download-file",
|
||||||
|
"download-media",
|
||||||
|
"download_media",
|
||||||
|
".pipe",
|
||||||
|
):
|
||||||
|
print("Auto-inserting download-file after PodcastIndex episode selection")
|
||||||
|
stages.insert(stage_index + 1, ["download-file"])
|
||||||
|
if table_type == "libgen" and next_cmd not in (
|
||||||
|
"download-file",
|
||||||
|
"download-media",
|
||||||
|
"download_media",
|
||||||
|
".pipe",
|
||||||
|
):
|
||||||
|
print("Auto-inserting download-file after Libgen selection")
|
||||||
|
stages.insert(stage_index + 1, ["download-file"])
|
||||||
continue
|
continue
|
||||||
|
|
||||||
ensure_registry_loaded()
|
ensure_registry_loaded()
|
||||||
|
|||||||
468
Provider/podcastindex.py
Normal file
468
Provider/podcastindex.py
Normal file
@@ -0,0 +1,468 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import hashlib
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
from ProviderCore.base import Provider, SearchResult
|
||||||
|
from SYS.logger import log
|
||||||
|
|
||||||
|
|
||||||
|
def _get_podcastindex_credentials(config: Dict[str, Any]) -> Tuple[str, str]:
|
||||||
|
provider = config.get("provider")
|
||||||
|
if not isinstance(provider, dict):
|
||||||
|
return "", ""
|
||||||
|
|
||||||
|
entry = provider.get("podcastindex")
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
return "", ""
|
||||||
|
|
||||||
|
key = entry.get("key") or entry.get("Key") or entry.get("api_key")
|
||||||
|
secret = entry.get("secret") or entry.get("Secret") or entry.get("api_secret")
|
||||||
|
|
||||||
|
key_str = str(key or "").strip()
|
||||||
|
secret_str = str(secret or "").strip()
|
||||||
|
return key_str, secret_str
|
||||||
|
|
||||||
|
|
||||||
|
class PodcastIndex(Provider):
|
||||||
|
"""Search provider for PodcastIndex.org."""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _format_duration(value: Any) -> str:
|
||||||
|
def _to_seconds(v: Any) -> Optional[int]:
|
||||||
|
if v is None:
|
||||||
|
return None
|
||||||
|
if isinstance(v, (int, float)):
|
||||||
|
try:
|
||||||
|
return max(0, int(v))
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
if isinstance(v, str):
|
||||||
|
text = v.strip()
|
||||||
|
if not text:
|
||||||
|
return None
|
||||||
|
if text.isdigit():
|
||||||
|
try:
|
||||||
|
return max(0, int(text))
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
# Accept common clock formats too.
|
||||||
|
if ":" in text:
|
||||||
|
parts = [p.strip() for p in text.split(":") if p.strip()]
|
||||||
|
if len(parts) == 2 and all(p.isdigit() for p in parts):
|
||||||
|
m, s = parts
|
||||||
|
return max(0, int(m) * 60 + int(s))
|
||||||
|
if len(parts) == 3 and all(p.isdigit() for p in parts):
|
||||||
|
h, m, s = parts
|
||||||
|
return max(0, int(h) * 3600 + int(m) * 60 + int(s))
|
||||||
|
return None
|
||||||
|
|
||||||
|
total = _to_seconds(value)
|
||||||
|
if total is None:
|
||||||
|
return "" if value is None else str(value).strip()
|
||||||
|
|
||||||
|
h = total // 3600
|
||||||
|
m = (total % 3600) // 60
|
||||||
|
s = total % 60
|
||||||
|
if h > 0:
|
||||||
|
return f"{h:d}h{m:d}m{s:d}s"
|
||||||
|
if m > 0:
|
||||||
|
return f"{m:d}m{s:d}s"
|
||||||
|
return f"{s:d}s"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _format_bytes(value: Any) -> str:
|
||||||
|
try:
|
||||||
|
n = int(value)
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
if n <= 0:
|
||||||
|
return ""
|
||||||
|
units = ["B", "KB", "MB", "GB", "TB"]
|
||||||
|
size = float(n)
|
||||||
|
unit = units[0]
|
||||||
|
for u in units:
|
||||||
|
unit = u
|
||||||
|
if size < 1024.0 or u == units[-1]:
|
||||||
|
break
|
||||||
|
size /= 1024.0
|
||||||
|
if unit == "B":
|
||||||
|
return f"{int(size)}{unit}"
|
||||||
|
return f"{size:.1f}{unit}"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _format_date_from_epoch(value: Any) -> str:
|
||||||
|
if value is None:
|
||||||
|
return ""
|
||||||
|
try:
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
ts = int(value)
|
||||||
|
if ts <= 0:
|
||||||
|
return ""
|
||||||
|
return datetime.datetime.utcfromtimestamp(ts).strftime("%Y-%m-%d")
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_episode_categories(ep: Dict[str, Any]) -> List[str]:
|
||||||
|
cats = ep.get("categories") or ep.get("category")
|
||||||
|
out: List[str] = []
|
||||||
|
|
||||||
|
if isinstance(cats, dict):
|
||||||
|
for v in cats.values():
|
||||||
|
if isinstance(v, str):
|
||||||
|
t = v.strip()
|
||||||
|
if t:
|
||||||
|
out.append(t)
|
||||||
|
elif isinstance(cats, list):
|
||||||
|
for v in cats:
|
||||||
|
if isinstance(v, str):
|
||||||
|
t = v.strip()
|
||||||
|
if t:
|
||||||
|
out.append(t)
|
||||||
|
elif isinstance(cats, str):
|
||||||
|
t = cats.strip()
|
||||||
|
if t:
|
||||||
|
out.append(t)
|
||||||
|
|
||||||
|
# Keep the table readable.
|
||||||
|
dedup: List[str] = []
|
||||||
|
seen: set[str] = set()
|
||||||
|
for t in out:
|
||||||
|
low = t.lower()
|
||||||
|
if low in seen:
|
||||||
|
continue
|
||||||
|
seen.add(low)
|
||||||
|
dedup.append(t)
|
||||||
|
return dedup
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _looks_like_episode(item: Any) -> bool:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
return False
|
||||||
|
md = item.get("full_metadata")
|
||||||
|
if not isinstance(md, dict):
|
||||||
|
return False
|
||||||
|
enc = md.get("enclosureUrl") or md.get("enclosure_url")
|
||||||
|
if isinstance(enc, str) and enc.strip().startswith("http"):
|
||||||
|
return True
|
||||||
|
# Some pipelines may flatten episode fields.
|
||||||
|
enc2 = item.get("enclosureUrl") or item.get("url")
|
||||||
|
return isinstance(enc2, str) and enc2.strip().startswith("http")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _compute_sha256(filepath: Path) -> str:
|
||||||
|
h = hashlib.sha256()
|
||||||
|
with open(filepath, "rb") as f:
|
||||||
|
for chunk in iter(lambda: f.read(1024 * 1024), b""):
|
||||||
|
h.update(chunk)
|
||||||
|
return h.hexdigest()
|
||||||
|
|
||||||
|
def selector(
|
||||||
|
self,
|
||||||
|
selected_items: List[Any],
|
||||||
|
*,
|
||||||
|
ctx: Any,
|
||||||
|
stage_is_last: bool = True,
|
||||||
|
**_kwargs: Any,
|
||||||
|
) -> bool:
|
||||||
|
if not stage_is_last:
|
||||||
|
return False
|
||||||
|
if not selected_items:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Episode selection (terminal): download episodes to temp/output dir.
|
||||||
|
if all(self._looks_like_episode(x) for x in selected_items):
|
||||||
|
return self._handle_episode_download_selection(selected_items, ctx)
|
||||||
|
|
||||||
|
# Podcast selection (terminal): expand into episode list.
|
||||||
|
return self._handle_podcast_expand_selection(selected_items, ctx)
|
||||||
|
|
||||||
|
def _handle_podcast_expand_selection(self, selected_items: List[Any], ctx: Any) -> bool:
|
||||||
|
chosen: List[Dict[str, Any]] = [x for x in (selected_items or []) if isinstance(x, dict)]
|
||||||
|
if not chosen:
|
||||||
|
return False
|
||||||
|
|
||||||
|
key, secret = _get_podcastindex_credentials(self.config or {})
|
||||||
|
if not key or not secret:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Resolve feed id/url from the selected podcast row.
|
||||||
|
item0 = chosen[0]
|
||||||
|
feed_md = item0.get("full_metadata") if isinstance(item0.get("full_metadata"), dict) else {}
|
||||||
|
feed_title = str(item0.get("title") or feed_md.get("title") or "Podcast").strip() or "Podcast"
|
||||||
|
feed_id = None
|
||||||
|
try:
|
||||||
|
feed_id = int(feed_md.get("id")) if feed_md.get("id") is not None else None
|
||||||
|
except Exception:
|
||||||
|
feed_id = None
|
||||||
|
feed_url = str(feed_md.get("url") or item0.get("path") or "").strip()
|
||||||
|
|
||||||
|
try:
|
||||||
|
from API.podcastindex import PodcastIndexClient
|
||||||
|
|
||||||
|
client = PodcastIndexClient(key, secret)
|
||||||
|
if feed_id:
|
||||||
|
episodes = client.episodes_byfeedid(feed_id, max_results=200)
|
||||||
|
else:
|
||||||
|
episodes = client.episodes_byfeedurl(feed_url, max_results=200)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"[podcastindex] episode lookup failed: {exc}", file=sys.stderr)
|
||||||
|
return True
|
||||||
|
|
||||||
|
try:
|
||||||
|
from SYS.result_table import ResultTable
|
||||||
|
from SYS.rich_display import stdout_console
|
||||||
|
except Exception:
|
||||||
|
return True
|
||||||
|
|
||||||
|
table = ResultTable(f"PodcastIndex Episodes: {feed_title}").set_preserve_order(True)
|
||||||
|
table.set_table("podcastindex.episodes")
|
||||||
|
try:
|
||||||
|
table.set_value_case("lower")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
results_payload: List[Dict[str, Any]] = []
|
||||||
|
for ep in episodes or []:
|
||||||
|
if not isinstance(ep, dict):
|
||||||
|
continue
|
||||||
|
|
||||||
|
ep_title = str(ep.get("title") or "").strip() or "Unknown"
|
||||||
|
enc_url = str(ep.get("enclosureUrl") or "").strip()
|
||||||
|
page_url = str(ep.get("link") or "").strip()
|
||||||
|
audio_url = enc_url or page_url
|
||||||
|
if not audio_url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
duration = ep.get("duration")
|
||||||
|
size_bytes = ep.get("enclosureLength") or ep.get("enclosure_length")
|
||||||
|
published = ep.get("datePublished") or ep.get("datePublishedPretty")
|
||||||
|
published_text = self._format_date_from_epoch(published) or str(published or "").strip()
|
||||||
|
|
||||||
|
sr = SearchResult(
|
||||||
|
table="podcastindex",
|
||||||
|
title=ep_title,
|
||||||
|
path=audio_url,
|
||||||
|
detail=feed_title,
|
||||||
|
media_kind="audio",
|
||||||
|
size_bytes=int(size_bytes) if str(size_bytes or "").isdigit() else None,
|
||||||
|
columns=[
|
||||||
|
("Title", ep_title),
|
||||||
|
("Date", published_text),
|
||||||
|
("Duration", self._format_duration(duration)),
|
||||||
|
("Size", self._format_bytes(size_bytes)),
|
||||||
|
("Url", audio_url),
|
||||||
|
],
|
||||||
|
full_metadata={
|
||||||
|
**dict(ep),
|
||||||
|
"_feed": dict(feed_md) if isinstance(feed_md, dict) else {},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
table.add_result(sr)
|
||||||
|
results_payload.append(sr.to_dict())
|
||||||
|
|
||||||
|
try:
|
||||||
|
ctx.set_last_result_table(table, results_payload)
|
||||||
|
ctx.set_current_stage_table(table)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
stdout_console().print()
|
||||||
|
stdout_console().print(table)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _handle_episode_download_selection(self, selected_items: List[Any], ctx: Any) -> bool:
|
||||||
|
key, secret = _get_podcastindex_credentials(self.config or {})
|
||||||
|
if not key or not secret:
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
from SYS.config import resolve_output_dir
|
||||||
|
|
||||||
|
output_dir = resolve_output_dir(self.config or {})
|
||||||
|
except Exception:
|
||||||
|
output_dir = Path.home() / "Downloads"
|
||||||
|
|
||||||
|
try:
|
||||||
|
output_dir = Path(output_dir).expanduser()
|
||||||
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
from SYS.download import _download_direct_file
|
||||||
|
except Exception:
|
||||||
|
return True
|
||||||
|
|
||||||
|
payloads: List[Dict[str, Any]] = []
|
||||||
|
downloaded = 0
|
||||||
|
|
||||||
|
for item in selected_items:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
md = item.get("full_metadata") if isinstance(item.get("full_metadata"), dict) else {}
|
||||||
|
enc_url = str(md.get("enclosureUrl") or item.get("url") or item.get("path") or "").strip()
|
||||||
|
if not enc_url or not enc_url.startswith("http"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
title_hint = str(item.get("title") or md.get("title") or "episode").strip() or "episode"
|
||||||
|
|
||||||
|
try:
|
||||||
|
result_obj = _download_direct_file(
|
||||||
|
enc_url,
|
||||||
|
Path(output_dir),
|
||||||
|
quiet=False,
|
||||||
|
suggested_filename=title_hint,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"[podcastindex] download failed: {exc}", file=sys.stderr)
|
||||||
|
continue
|
||||||
|
|
||||||
|
downloaded_path = None
|
||||||
|
try:
|
||||||
|
downloaded_path = getattr(result_obj, "filepath", None)
|
||||||
|
except Exception:
|
||||||
|
downloaded_path = None
|
||||||
|
if downloaded_path is None:
|
||||||
|
try:
|
||||||
|
downloaded_path = getattr(result_obj, "file_path", None)
|
||||||
|
except Exception:
|
||||||
|
downloaded_path = None
|
||||||
|
if downloaded_path is None:
|
||||||
|
try:
|
||||||
|
downloaded_path = getattr(result_obj, "path", None)
|
||||||
|
except Exception:
|
||||||
|
downloaded_path = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
local_path = Path(str(downloaded_path))
|
||||||
|
except Exception:
|
||||||
|
local_path = None
|
||||||
|
if local_path is None or not local_path.exists():
|
||||||
|
continue
|
||||||
|
|
||||||
|
sha256 = ""
|
||||||
|
try:
|
||||||
|
sha256 = self._compute_sha256(local_path)
|
||||||
|
except Exception:
|
||||||
|
sha256 = ""
|
||||||
|
|
||||||
|
tags: List[str] = []
|
||||||
|
tags.append(f"title:{title_hint}")
|
||||||
|
cats = self._extract_episode_categories(md) if isinstance(md, dict) else []
|
||||||
|
for c in cats[:10]:
|
||||||
|
tags.append(f"tag:{c}")
|
||||||
|
|
||||||
|
payload: Dict[str, Any] = {
|
||||||
|
"path": str(local_path),
|
||||||
|
"hash": sha256,
|
||||||
|
"title": title_hint,
|
||||||
|
"action": "provider:podcastindex.selector",
|
||||||
|
"download_mode": "file",
|
||||||
|
"store": "local",
|
||||||
|
"media_kind": "audio",
|
||||||
|
"tag": tags,
|
||||||
|
"provider": "podcastindex",
|
||||||
|
"url": enc_url,
|
||||||
|
}
|
||||||
|
if isinstance(md, dict) and md:
|
||||||
|
payload["full_metadata"] = dict(md)
|
||||||
|
|
||||||
|
payloads.append(payload)
|
||||||
|
downloaded += 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
if payloads and hasattr(ctx, "set_last_result_items_only"):
|
||||||
|
ctx.set_last_result_items_only(payloads)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if downloaded <= 0:
|
||||||
|
return True
|
||||||
|
|
||||||
|
try:
|
||||||
|
from SYS.rich_display import stdout_console
|
||||||
|
|
||||||
|
stdout_console().print(f"Downloaded {downloaded} episode(s) -> {output_dir}")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return True
|
||||||
|
|
||||||
|
def validate(self) -> bool:
|
||||||
|
key, secret = _get_podcastindex_credentials(self.config or {})
|
||||||
|
return bool(key and secret)
|
||||||
|
|
||||||
|
def search(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
limit: int = 10,
|
||||||
|
filters: Optional[Dict[str, Any]] = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> List[SearchResult]:
|
||||||
|
_ = filters
|
||||||
|
_ = kwargs
|
||||||
|
|
||||||
|
key, secret = _get_podcastindex_credentials(self.config or {})
|
||||||
|
if not key or not secret:
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
from API.podcastindex import PodcastIndexClient
|
||||||
|
|
||||||
|
client = PodcastIndexClient(key, secret)
|
||||||
|
feeds = client.search_byterm(query, max_results=limit)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"[podcastindex] search failed: {exc}", file=sys.stderr)
|
||||||
|
return []
|
||||||
|
|
||||||
|
results: List[SearchResult] = []
|
||||||
|
for feed in feeds[: max(0, int(limit))]:
|
||||||
|
if not isinstance(feed, dict):
|
||||||
|
continue
|
||||||
|
|
||||||
|
title = str(feed.get("title") or "").strip() or "Unknown"
|
||||||
|
author = str(feed.get("author") or feed.get("ownerName") or "").strip()
|
||||||
|
feed_url = str(feed.get("url") or "").strip()
|
||||||
|
site_url = str(feed.get("link") or "").strip()
|
||||||
|
language = str(feed.get("language") or "").strip()
|
||||||
|
|
||||||
|
episode_count_val = feed.get("episodeCount")
|
||||||
|
episode_count = ""
|
||||||
|
if episode_count_val is not None:
|
||||||
|
try:
|
||||||
|
episode_count = str(int(episode_count_val))
|
||||||
|
except Exception:
|
||||||
|
episode_count = str(episode_count_val).strip()
|
||||||
|
|
||||||
|
path = feed_url or site_url or str(feed.get("id") or "").strip()
|
||||||
|
|
||||||
|
columns = [
|
||||||
|
("Title", title),
|
||||||
|
("Author", author),
|
||||||
|
("Episodes", episode_count),
|
||||||
|
("Lang", language),
|
||||||
|
("Feed", feed_url),
|
||||||
|
]
|
||||||
|
|
||||||
|
results.append(
|
||||||
|
SearchResult(
|
||||||
|
table="podcastindex",
|
||||||
|
title=title,
|
||||||
|
path=path,
|
||||||
|
detail=author,
|
||||||
|
media_kind="audio",
|
||||||
|
columns=columns,
|
||||||
|
full_metadata=dict(feed),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return results
|
||||||
@@ -25,6 +25,7 @@ from Provider.fileio import FileIO
|
|||||||
from Provider.zeroxzero import ZeroXZero
|
from Provider.zeroxzero import ZeroXZero
|
||||||
from Provider.loc import LOC
|
from Provider.loc import LOC
|
||||||
from Provider.internetarchive import InternetArchive
|
from Provider.internetarchive import InternetArchive
|
||||||
|
from Provider.podcastindex import PodcastIndex
|
||||||
|
|
||||||
_PROVIDERS: Dict[str,
|
_PROVIDERS: Dict[str,
|
||||||
Type[Provider]] = {
|
Type[Provider]] = {
|
||||||
@@ -38,6 +39,7 @@ _PROVIDERS: Dict[str,
|
|||||||
"youtube": YouTube,
|
"youtube": YouTube,
|
||||||
"telegram": Telegram,
|
"telegram": Telegram,
|
||||||
"loc": LOC,
|
"loc": LOC,
|
||||||
|
"podcastindex": PodcastIndex,
|
||||||
# Upload-capable providers
|
# Upload-capable providers
|
||||||
"0x0": ZeroXZero,
|
"0x0": ZeroXZero,
|
||||||
"file.io": FileIO,
|
"file.io": FileIO,
|
||||||
|
|||||||
@@ -55,7 +55,10 @@ def suspend_live_progress():
|
|||||||
|
|
||||||
def _is_selectable_table(table: Any) -> bool:
|
def _is_selectable_table(table: Any) -> bool:
|
||||||
"""Return True when a table can be used for @ selection."""
|
"""Return True when a table can be used for @ selection."""
|
||||||
return bool(table) and not getattr(table, "no_choice", False)
|
# Avoid relying on truthiness for selectability.
|
||||||
|
# `ResultTable` can be falsey when it has 0 rows, but `@` selection/filtering
|
||||||
|
# should still be allowed when the backing `last_result_items` exist.
|
||||||
|
return table is not None and not getattr(table, "no_choice", False)
|
||||||
|
|
||||||
|
|
||||||
# Pipeline state container (prototype)
|
# Pipeline state container (prototype)
|
||||||
|
|||||||
@@ -363,5 +363,4 @@ class PipelineRunner:
|
|||||||
# Best-effort; don't break the pipeline runner
|
# Best-effort; don't break the pipeline runner
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Ensure module-level variables reflect restored state
|
|
||||||
ctx.sync_module_state(state)
|
|
||||||
|
|||||||
@@ -1,134 +0,0 @@
|
|||||||
# Bootstrapping the development environment
|
|
||||||
|
|
||||||
This project includes convenience scripts to create a Python virtual environment, install the package, and (optionally) create OS shortcuts.
|
|
||||||
|
|
||||||
Files:
|
|
||||||
- `scripts/bootstrap.ps1` — PowerShell script for Windows (creates venv, installs, optional Desktop/Start Menu shortcuts)
|
|
||||||
- `scripts/bootstrap.sh` — POSIX shell script (Linux/macOS) (creates venv, installs, optional desktop launcher)
|
|
||||||
|
|
||||||
Quick examples
|
|
||||||
|
|
||||||
Windows (PowerShell):
|
|
||||||
|
|
||||||
```powershell
|
|
||||||
# Create a .venv, install in editable mode and add a Desktop shortcut
|
|
||||||
powershell -ExecutionPolicy Bypass -File .\scripts\bootstrap.ps1 -Editable -CreateDesktopShortcut
|
|
||||||
|
|
||||||
# Use a specific python.exe and force overwrite
|
|
||||||
powershell -ExecutionPolicy Bypass -File .\scripts\bootstrap.ps1 -Python "C:\\Python39\\python.exe" -Force
|
|
||||||
```
|
|
||||||
|
|
||||||
Linux/macOS (bash):
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Create a .venv and install the project in editable mode
|
|
||||||
./scripts/bootstrap.sh --editable
|
|
||||||
|
|
||||||
# Create a desktop entry (GNU/Linux)
|
|
||||||
./scripts/bootstrap.sh --editable --desktop
|
|
||||||
```
|
|
||||||
|
|
||||||
Notes
|
|
||||||
|
|
||||||
- On Windows you may need to run PowerShell with an appropriate ExecutionPolicy (example shows using `-ExecutionPolicy Bypass`).
|
|
||||||
- The scripts default to a venv directory named `.venv` in the repository root. Use `-VenvPath` (PowerShell) or `--venv` (bash) to choose a different directory.
|
|
||||||
- The scripts will also install Playwright browser binaries by default (Chromium only) after installing Python dependencies. Use `--no-playwright` (bash) or `-NoPlaywright` (PowerShell) to opt out, or `--playwright-browsers <list>` / `-PlaywrightBrowsers <list>` to request specific engines (comma-separated, or use `all` to install all engines).
|
|
||||||
- The scripts are intended to make day-to-day developer setup easy; tweak flags for your desired install mode (editable vs normal) and shortcut preferences.
|
|
||||||
|
|
||||||
## Deno — installed by bootstrap
|
|
||||||
|
|
||||||
The bootstrap scripts will automatically install Deno if it is not already present on the system. They use the official installers and attempt to add Deno's bin directory to the PATH for the current session. If the installer completes but `deno` is not available in your shell, restart your shell or add `$HOME/.deno/bin` (Windows: `%USERPROFILE%\\.deno\\bin`) to your PATH.
|
|
||||||
|
|
||||||
Opinionated behavior
|
|
||||||
|
|
||||||
Running `python ./scripts/bootstrap.py` is intentionally opinionated: it will create a local virtual environment at `./.venv` (repo root), install Python dependencies and the project into that venv, install Playwright browsers, install Deno, and write small launcher scripts in the project root:
|
|
||||||
|
|
||||||
- `mm` (POSIX shell)
|
|
||||||
- `mm.ps1` (PowerShell)
|
|
||||||
- `mm.bat` (Windows CMD)
|
|
||||||
|
|
||||||
These launchers prefer the local `./.venv` Python and console scripts so you can run the project with `./mm` or `mm.ps1` directly from the repo root.
|
|
||||||
|
|
||||||
- When installing in editable mode from a development checkout, the bootstrap will also add a small `.pth` file to the venv's `site-packages` pointing at the repository root. This ensures top-level scripts such as `CLI.py` are importable even when using PEP 660 editable wheels (avoids having to create an egg-link by hand).
|
|
||||||
|
|
||||||
Additionally, the setup helpers install a global `mm` launcher into your user bin so you can run `mm` from any shell session:
|
|
||||||
|
|
||||||
- POSIX: `~/.local/bin/mm` (created if missing; the script attempts to add `~/.local/bin` to `PATH` by updating `~/.profile` / shell RCs if required)
|
|
||||||
- Windows: `%USERPROFILE%\bin\mm.cmd` and `%USERPROFILE%\bin\mm.ps1` (created if missing; the script attempts to add the folder to your **User** PATH)
|
|
||||||
|
|
||||||
The scripts back up any existing `mm` shims before replacing them and will print actionable messages when a shell restart is required.
|
|
||||||
|
|
||||||
Debugging the global `mm` launcher
|
|
||||||
|
|
||||||
- POSIX: set MM_DEBUG=1 and run `mm` to print runtime diagnostics (resolved REPO, VENV, and Python import checks):
|
|
||||||
|
|
||||||
```bash
|
|
||||||
MM_DEBUG=1 mm
|
|
||||||
```
|
|
||||||
|
|
||||||
- PowerShell: set and export `$env:MM_DEBUG='1'` then run `mm.ps1` or the installed `mm` shim:
|
|
||||||
|
|
||||||
```powershell
|
|
||||||
$env:MM_DEBUG = '1'
|
|
||||||
mm
|
|
||||||
```
|
|
||||||
|
|
||||||
- CMD: `set MM_DEBUG=1` then run `mm`.
|
|
||||||
|
|
||||||
These diagnostics help identify whether the global launcher is selecting the correct repository and virtual environment; please include the output when reporting launcher failures.
|
|
||||||
|
|
||||||
PowerShell (Windows):
|
|
||||||
```powershell
|
|
||||||
irm https://deno.land/install.ps1 | iex
|
|
||||||
```
|
|
||||||
|
|
||||||
Linux/macOS:
|
|
||||||
```bash
|
|
||||||
curl -fsSL https://deno.land/install.sh | sh
|
|
||||||
```
|
|
||||||
|
|
||||||
Pinning a Deno version
|
|
||||||
|
|
||||||
You can pin a Deno release by setting the `DENO_VERSION` environment variable before running the bootstrap script. Examples:
|
|
||||||
|
|
||||||
PowerShell (Windows):
|
|
||||||
```powershell
|
|
||||||
$env:DENO_VERSION = 'v1.34.3'; .\scripts\bootstrap.ps1
|
|
||||||
```
|
|
||||||
|
|
||||||
POSIX (Linux/macOS):
|
|
||||||
```bash
|
|
||||||
DENO_VERSION=v1.34.3 ./scripts/bootstrap.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
If you'd like, I can also:
|
|
||||||
- Add a short README section in `readme.md` referencing this doc, or
|
|
||||||
- Add a small icon and polish Linux desktop entries with an icon path.
|
|
||||||
|
|
||||||
## Troubleshooting: urllib3 / urllib3-future conflicts ⚠️
|
|
||||||
|
|
||||||
On some environments a third-party package (for example `urllib3-future`) may
|
|
||||||
install a site-packages hook that interferes with the real `urllib3` package.
|
|
||||||
When this happens you might see errors like:
|
|
||||||
|
|
||||||
Error importing cmdlet 'get_tag': No module named 'urllib3.exceptions'
|
|
||||||
|
|
||||||
The bootstrap scripts now run a verification step after installing dependencies
|
|
||||||
and will stop if a broken `urllib3` is detected to avoid leaving you with a
|
|
||||||
partially broken venv.
|
|
||||||
|
|
||||||
Recommended fix (activate the venv first or use the venv python explicitly):
|
|
||||||
|
|
||||||
PowerShell / Windows (from repo root):
|
|
||||||
|
|
||||||
.venv\Scripts\python.exe -m pip uninstall urllib3-future -y
|
|
||||||
.venv\Scripts\python.exe -m pip install --upgrade --force-reinstall urllib3
|
|
||||||
.venv\Scripts\python.exe -m pip install niquests -U
|
|
||||||
|
|
||||||
POSIX (Linux/macOS):
|
|
||||||
|
|
||||||
.venv/bin/python -m pip uninstall urllib3-future -y
|
|
||||||
.venv/bin/python -m pip install --upgrade --force-reinstall urllib3
|
|
||||||
.venv/bin/python -m pip install niquests -U
|
|
||||||
|
|
||||||
If problems persist, re-run the bootstrap script after applying the fixes.
|
|
||||||
@@ -1,234 +0,0 @@
|
|||||||
# get-url Architecture & Flow
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
|
|
||||||
The enhanced `get-url` command supports two modes:
|
|
||||||
|
|
||||||
```
|
|
||||||
get-url
|
|
||||||
├── SEARCH MODE (new)
|
|
||||||
│ └── -url "pattern"
|
|
||||||
│ ├── Normalize pattern (strip protocol, www)
|
|
||||||
│ ├── Search all stores
|
|
||||||
│ ├── Match URLs with wildcards
|
|
||||||
│ └── Return grouped results
|
|
||||||
│
|
|
||||||
└── ORIGINAL MODE (unchanged)
|
|
||||||
├── Hash lookup
|
|
||||||
├── Store lookup
|
|
||||||
└── Return URLs for file
|
|
||||||
```
|
|
||||||
|
|
||||||
## Flow Diagram: URL Search
|
|
||||||
|
|
||||||
```
|
|
||||||
User Input
|
|
||||||
│
|
|
||||||
v
|
|
||||||
get-url -url "youtube.com*"
|
|
||||||
│
|
|
||||||
v
|
|
||||||
_normalize_url_for_search()
|
|
||||||
│ Strips: https://, http://, www.
|
|
||||||
│ Result: "youtube.com*" (unchanged, already normalized)
|
|
||||||
v
|
|
||||||
_search_urls_across_stores()
|
|
||||||
│
|
|
||||||
├─→ Store 1 (Hydrus)
|
|
||||||
│ ├─→ search("*", limit=1000)
|
|
||||||
│ ├─→ get_url(file_hash) for each file
|
|
||||||
│ └─→ _match_url_pattern() for each URL
|
|
||||||
│
|
|
||||||
├─→ Store 2 (Folder)
|
|
||||||
│ ├─→ search("*", limit=1000)
|
|
||||||
│ ├─→ get_url(file_hash) for each file
|
|
||||||
│ └─→ _match_url_pattern() for each URL
|
|
||||||
│
|
|
||||||
└─→ ...more stores...
|
|
||||||
|
|
||||||
Matching URLs:
|
|
||||||
├─→ https://www.youtube.com/watch?v=123
|
|
||||||
├─→ http://youtube.com/shorts/abc
|
|
||||||
└─→ https://youtube.com/playlist?list=xyz
|
|
||||||
|
|
||||||
Normalized for matching:
|
|
||||||
├─→ youtube.com/watch?v=123 ✓ Matches "youtube.com*"
|
|
||||||
├─→ youtube.com/shorts/abc ✓ Matches "youtube.com*"
|
|
||||||
└─→ youtube.com/playlist?... ✓ Matches "youtube.com*"
|
|
||||||
|
|
||||||
v
|
|
||||||
Collect UrlItem results
|
|
||||||
│
|
|
||||||
├─→ UrlItem(url="https://www.youtube.com/watch?v=123",
|
|
||||||
│ hash="abcd1234...", store="hydrus")
|
|
||||||
│
|
|
||||||
├─→ UrlItem(url="http://youtube.com/shorts/abc",
|
|
||||||
│ hash="efgh5678...", store="folder")
|
|
||||||
│
|
|
||||||
└─→ ...more items...
|
|
||||||
|
|
||||||
v
|
|
||||||
Group by store
|
|
||||||
│
|
|
||||||
├─→ Hydrus
|
|
||||||
│ ├─→ https://www.youtube.com/watch?v=123
|
|
||||||
│ └─→ ...
|
|
||||||
│
|
|
||||||
└─→ Folder
|
|
||||||
├─→ http://youtube.com/shorts/abc
|
|
||||||
└─→ ...
|
|
||||||
|
|
||||||
v
|
|
||||||
Emit UrlItem objects for piping
|
|
||||||
│
|
|
||||||
v
|
|
||||||
Return exit code 0 (success)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Code Structure
|
|
||||||
|
|
||||||
```
|
|
||||||
Get_Url (class)
|
|
||||||
│
|
|
||||||
├── __init__()
|
|
||||||
│ └── Register command with CLI
|
|
||||||
│
|
|
||||||
├── _normalize_url_for_search() [static]
|
|
||||||
│ └── Strip protocol & www, lowercase
|
|
||||||
│
|
|
||||||
├── _match_url_pattern() [static]
|
|
||||||
│ └── fnmatch with normalization
|
|
||||||
│
|
|
||||||
├── _search_urls_across_stores() [instance]
|
|
||||||
│ ├── Iterate stores
|
|
||||||
│ ├── Search files in store
|
|
||||||
│ ├── Get URLs for each file
|
|
||||||
│ ├── Apply pattern matching
|
|
||||||
│ └── Return (items, stores_found)
|
|
||||||
│
|
|
||||||
└── run() [main execution]
|
|
||||||
├── Check for -url flag
|
|
||||||
│ ├── YES: Search mode
|
|
||||||
│ │ └── _search_urls_across_stores()
|
|
||||||
│ └── NO: Original mode
|
|
||||||
│ └── Hash+store lookup
|
|
||||||
│
|
|
||||||
└── Return exit code
|
|
||||||
```
|
|
||||||
|
|
||||||
## Data Flow Examples
|
|
||||||
|
|
||||||
### Example 1: Search by Domain
|
|
||||||
```
|
|
||||||
Input: get-url -url "www.google.com"
|
|
||||||
|
|
||||||
Normalize: "google.com" (www. stripped)
|
|
||||||
|
|
||||||
Search Results:
|
|
||||||
Store "hydrus":
|
|
||||||
- https://www.google.com ✓
|
|
||||||
- https://google.com/search?q=hello ✓
|
|
||||||
- https://google.com/maps ✓
|
|
||||||
|
|
||||||
Store "folder":
|
|
||||||
- http://google.com ✓
|
|
||||||
- https://google.com/images ✓
|
|
||||||
|
|
||||||
Output: 5 matching URLs grouped by store
|
|
||||||
```
|
|
||||||
|
|
||||||
### Example 2: Wildcard Pattern
|
|
||||||
```
|
|
||||||
Input: get-url -url "youtube.com/watch*"
|
|
||||||
|
|
||||||
Pattern: "youtube.com/watch*"
|
|
||||||
|
|
||||||
Search Results:
|
|
||||||
Store "hydrus":
|
|
||||||
- https://www.youtube.com/watch?v=123 ✓
|
|
||||||
- https://youtube.com/watch?list=abc ✓
|
|
||||||
- https://www.youtube.com/shorts/xyz ✗ (doesn't match /watch*)
|
|
||||||
|
|
||||||
Store "folder":
|
|
||||||
- http://youtube.com/watch?v=456 ✓
|
|
||||||
|
|
||||||
Output: 3 matching URLs (watch only, not shorts)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Example 3: Subdomain Wildcard
|
|
||||||
```
|
|
||||||
Input: get-url -url "*.example.com*"
|
|
||||||
|
|
||||||
Normalize: "*.example.com*" (already normalized)
|
|
||||||
|
|
||||||
Search Results:
|
|
||||||
Store "hydrus":
|
|
||||||
- https://cdn.example.com/video.mp4 ✓
|
|
||||||
- https://api.example.com/endpoint ✓
|
|
||||||
- https://www.example.com ✓
|
|
||||||
- https://other.org ✗
|
|
||||||
|
|
||||||
Output: 3 matching URLs
|
|
||||||
```
|
|
||||||
|
|
||||||
## Integration with Piping
|
|
||||||
|
|
||||||
```
|
|
||||||
# Search → Filter → Add Tag
|
|
||||||
get-url -url "youtube.com*" | add-tag -tag "video-source"
|
|
||||||
|
|
||||||
# Search → Count
|
|
||||||
get-url -url "reddit.com*" | wc -l
|
|
||||||
|
|
||||||
# Search → Export
|
|
||||||
get-url -url "github.com*" > github_urls.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
## Error Handling Flow
|
|
||||||
|
|
||||||
```
|
|
||||||
get-url -url "pattern"
|
|
||||||
│
|
|
||||||
├─→ No stores configured?
|
|
||||||
│ └─→ Log "Error: No stores configured"
|
|
||||||
│ └─→ Return exit code 1
|
|
||||||
│
|
|
||||||
├─→ Store search fails?
|
|
||||||
│ └─→ Log error, skip store, continue
|
|
||||||
│
|
|
||||||
├─→ No matches found?
|
|
||||||
│ └─→ Log "No urls matching pattern"
|
|
||||||
│ └─→ Return exit code 1
|
|
||||||
│
|
|
||||||
└─→ Matches found?
|
|
||||||
└─→ Return exit code 0
|
|
||||||
```
|
|
||||||
|
|
||||||
## Performance Considerations
|
|
||||||
|
|
||||||
1. **Store Iteration**: Loops through all configured stores
|
|
||||||
2. **File Scanning**: Each store searches up to 1000 files
|
|
||||||
3. **URL Matching**: Each URL tested against pattern (fnmatch - O(n) per URL)
|
|
||||||
4. **Memory**: Stores all matching items in memory before display
|
|
||||||
|
|
||||||
Optimization opportunities:
|
|
||||||
- Cache store results
|
|
||||||
- Limit search scope with --store flag
|
|
||||||
- Early exit with --limit N
|
|
||||||
- Pagination support
|
|
||||||
|
|
||||||
## Backward Compatibility
|
|
||||||
|
|
||||||
Original mode (unchanged):
|
|
||||||
```
|
|
||||||
@1 | get-url
|
|
||||||
│
|
|
||||||
└─→ No -url flag
|
|
||||||
└─→ Use original logic
|
|
||||||
├─→ Get hash from result
|
|
||||||
├─→ Get store from result or args
|
|
||||||
├─→ Call backend.get_url(hash)
|
|
||||||
└─→ Return URLs for that file
|
|
||||||
```
|
|
||||||
|
|
||||||
All original functionality preserved. New -url flag is additive only.
|
|
||||||
@@ -1,76 +0,0 @@
|
|||||||
# Quick Reference: get-url URL Search
|
|
||||||
|
|
||||||
## Basic Syntax
|
|
||||||
```bash
|
|
||||||
# Search mode (new)
|
|
||||||
get-url -url "pattern"
|
|
||||||
|
|
||||||
# Original mode (unchanged)
|
|
||||||
@1 | get-url
|
|
||||||
```
|
|
||||||
|
|
||||||
## Examples
|
|
||||||
|
|
||||||
### Exact domain match
|
|
||||||
```bash
|
|
||||||
get-url -url "google.com"
|
|
||||||
```
|
|
||||||
Matches: `https://www.google.com`, `http://google.com/search`, `https://google.com/maps`
|
|
||||||
|
|
||||||
### YouTube URL search
|
|
||||||
```bash
|
|
||||||
get-url -url "https://www.youtube.com/watch?v=xx_88TDWmEs"
|
|
||||||
```
|
|
||||||
Normalizes to: `youtube.com/watch?v=xx_88tdwmes`
|
|
||||||
Matches: Any video with same ID across different protocols
|
|
||||||
|
|
||||||
### Wildcard domain
|
|
||||||
```bash
|
|
||||||
get-url -url "youtube.com*"
|
|
||||||
```
|
|
||||||
Matches: All YouTube URLs (videos, shorts, playlists, etc.)
|
|
||||||
|
|
||||||
### Subdomain wildcard
|
|
||||||
```bash
|
|
||||||
get-url -url "*.example.com*"
|
|
||||||
```
|
|
||||||
Matches: `cdn.example.com`, `api.example.com`, `www.example.com`
|
|
||||||
|
|
||||||
### Specific path pattern
|
|
||||||
```bash
|
|
||||||
get-url -url "youtube.com/watch*"
|
|
||||||
```
|
|
||||||
Matches: Only YouTube watch URLs (not shorts or playlists)
|
|
||||||
|
|
||||||
### Single character wildcard
|
|
||||||
```bash
|
|
||||||
get-url -url "example.com/file?.mp4"
|
|
||||||
```
|
|
||||||
Matches: `example.com/file1.mp4`, `example.com/fileA.mp4` (not `file12.mp4`)
|
|
||||||
|
|
||||||
## How It Works
|
|
||||||
|
|
||||||
1. **Normalization**: Strips `https://`, `www.` prefix from pattern and all URLs
|
|
||||||
2. **Pattern Matching**: Uses `*` and `?` wildcards (case-insensitive)
|
|
||||||
3. **Search**: Scans all configured stores for matching URLs
|
|
||||||
4. **Results**: Groups matches by store, shows URL and hash
|
|
||||||
|
|
||||||
## Return Values
|
|
||||||
- Exit code **0** if matches found
|
|
||||||
- Exit code **1** if no matches or error
|
|
||||||
|
|
||||||
## Piping Results
|
|
||||||
```bash
|
|
||||||
get-url -url "youtube.com*" | grep -i video
|
|
||||||
get-url -url "example.com*" | add-tag -tag "external-source"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Common Patterns
|
|
||||||
|
|
||||||
| Pattern | Matches | Notes |
|
|
||||||
|---------|---------|-------|
|
|
||||||
| `google.com` | Google URLs | Exact domain (after normalization) |
|
|
||||||
| `youtube.com*` | All YouTube | Wildcard at end |
|
|
||||||
| `*.example.com*` | Subdomains | Wildcard at start and end |
|
|
||||||
| `github.com/user*` | User repos | Path pattern |
|
|
||||||
| `reddit.com/r/*` | Subreddit | Path with wildcard |
|
|
||||||
@@ -1,91 +0,0 @@
|
|||||||
# get-url Enhanced URL Search
|
|
||||||
|
|
||||||
The `get-url` command now supports searching for URLs across all stores with automatic protocol and `www` prefix stripping.
|
|
||||||
|
|
||||||
## Features
|
|
||||||
|
|
||||||
### 1. **Protocol Stripping**
|
|
||||||
URLs are normalized by removing:
|
|
||||||
- Protocol prefixes: `https://`, `http://`, `ftp://`, etc.
|
|
||||||
- `www.` prefix (case-insensitive)
|
|
||||||
|
|
||||||
### 2. **Wildcard Matching**
|
|
||||||
Patterns support standard wildcards:
|
|
||||||
- `*` - matches any sequence of characters
|
|
||||||
- `?` - matches any single character
|
|
||||||
|
|
||||||
### 3. **Case-Insensitive Matching**
|
|
||||||
All matching is case-insensitive for domains and paths
|
|
||||||
|
|
||||||
## Usage Examples
|
|
||||||
|
|
||||||
### Search by full domain
|
|
||||||
```bash
|
|
||||||
get-url -url "www.google.com"
|
|
||||||
# Matches:
|
|
||||||
# - https://www.google.com
|
|
||||||
# - http://google.com/search
|
|
||||||
# - https://google.com/maps
|
|
||||||
```
|
|
||||||
|
|
||||||
### Search with YouTube example
|
|
||||||
```bash
|
|
||||||
get-url -url "https://www.youtube.com/watch?v=xx_88TDWmEs"
|
|
||||||
# Becomes: youtube.com/watch?v=xx_88tdwmes
|
|
||||||
# Matches:
|
|
||||||
# - https://www.youtube.com/watch?v=xx_88TDWmEs
|
|
||||||
# - http://youtube.com/watch?v=xx_88TDWmEs
|
|
||||||
```
|
|
||||||
|
|
||||||
### Domain wildcard matching
|
|
||||||
```bash
|
|
||||||
get-url -url "youtube.com*"
|
|
||||||
# Matches any URL starting with youtube.com:
|
|
||||||
# - https://www.youtube.com/watch?v=123
|
|
||||||
# - https://youtube.com/shorts/abc
|
|
||||||
# - http://youtube.com/playlist?list=xyz
|
|
||||||
```
|
|
||||||
|
|
||||||
### Subdomain matching
|
|
||||||
```bash
|
|
||||||
get-url -url "*example.com*"
|
|
||||||
# Matches:
|
|
||||||
# - https://cdn.example.com/file.mp4
|
|
||||||
# - https://www.example.com
|
|
||||||
# - https://api.example.com/endpoint
|
|
||||||
```
|
|
||||||
|
|
||||||
### Specific path matching
|
|
||||||
```bash
|
|
||||||
get-url -url "youtube.com/watch*"
|
|
||||||
# Matches:
|
|
||||||
# - https://www.youtube.com/watch?v=123
|
|
||||||
# - http://youtube.com/watch?list=abc
|
|
||||||
# Does NOT match:
|
|
||||||
# - https://youtube.com/shorts/abc
|
|
||||||
```
|
|
||||||
|
|
||||||
## Get URLs for Specific File
|
|
||||||
|
|
||||||
The original functionality is still supported:
|
|
||||||
```bash
|
|
||||||
@1 | get-url
|
|
||||||
# Requires hash and store from piped result
|
|
||||||
```
|
|
||||||
|
|
||||||
## Output
|
|
||||||
|
|
||||||
Results are organized by store and show:
|
|
||||||
- **Store**: Backend name (hydrus, folder, etc.)
|
|
||||||
- **Url**: The full matched URL
|
|
||||||
- **Hash**: First 16 characters of the file hash (for compactness)
|
|
||||||
|
|
||||||
## Implementation Details
|
|
||||||
|
|
||||||
The search:
|
|
||||||
1. Iterates through all configured stores
|
|
||||||
2. Searches for all files in each store (limit 1000 per store)
|
|
||||||
3. Retrieves URLs for each file
|
|
||||||
4. Applies pattern matching with normalization
|
|
||||||
5. Returns results grouped by store
|
|
||||||
6. Emits `UrlItem` objects for piping to other commands
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
Known issues and brief remediation steps
|
|
||||||
|
|
||||||
- urllib3 / urllib3-future conflict
|
|
||||||
- Symptom: `No module named 'urllib3.exceptions'` or missing `urllib3.__version__`.
|
|
||||||
- Root cause: a `.pth` file or packaging hook from `urllib3-future` may mutate the
|
|
||||||
`urllib3` namespace in incompatible ways.
|
|
||||||
- Remediation: uninstall `urllib3-future`, reinstall `urllib3`, and re-install
|
|
||||||
`niquests` if required. See `docs/ISSUES/urllib3-future.md` for more details.
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
# Obtain cookies.txt for youtube.com
|
|
||||||
1. You need a google account, throwaway is fine
|
|
||||||
2. You need webbrowser extension Get cookies.txt LOCALLY
|
|
||||||
|
|
||||||
Chrome based browser: [cookies.txt LOCALLY](https://chromewebstore.google.com/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc)
|
|
||||||
|
|
||||||
Firefox based browser: [cookies.txt LOCALLY](https://addons.mozilla.org/en-US/firefox/addon/get-cookies-txt-locally/)
|
|
||||||
|
|
||||||
3. open incognito tab and sign into youtube with your account
|
|
||||||
4. open extension and click on "export all cookies"
|
|
||||||
5. with the cookies.txt file produced, place that in the project folder
|
|
||||||
|
|
||||||
restart the medios-macina app and verify status for cookies is FOUND
|
|
||||||
@@ -1,41 +0,0 @@
|
|||||||
1. open shell prompt to a good spot for hydrusnetwork i.e. C:\hydrusnetwork
|
|
||||||
2. send command "git clone https://github.com/hydrusnetwork/hydrus"
|
|
||||||
3. send command "cd hydrus"
|
|
||||||
4. send command "python -m venv .venv"
|
|
||||||
|
|
||||||
---------------------------------------------------
|
|
||||||
5. Windows
|
|
||||||
1. send command ".\.venv\Scripts\Activate.ps1"
|
|
||||||
|
|
||||||
5. Linux
|
|
||||||
1. send command "source .venv/bin/activate"
|
|
||||||
|
|
||||||
--------------------------------------------------
|
|
||||||
your commandline should have (.venv) infront of it now
|
|
||||||
|
|
||||||
5. send command "pip install -r requirements.txt"
|
|
||||||
6. send command "python hydrus_client.py"
|
|
||||||
---------------------------------------------------
|
|
||||||
the gui application should be opened now
|
|
||||||
7.in the top menu click on services > manage services > double-click "client api"
|
|
||||||
8.check the boxes
|
|
||||||
X run the client api?
|
|
||||||
X allow non-local connections
|
|
||||||
X supports CORS headers
|
|
||||||
click apply
|
|
||||||
|
|
||||||
9.click on services > review services > click on "client api"
|
|
||||||
10. click "Add" > manually > change "new api permissions" to "medios"
|
|
||||||
11. click apply > click "copy api access key", click "open client api base url"
|
|
||||||
|
|
||||||
--------------------------------------------
|
|
||||||
edit the below and place in your config.conf
|
|
||||||
|
|
||||||
<figure>
|
|
||||||
<figcaption>config.conf</figcaption>
|
|
||||||
<pre><code class="language-powershell">[store=hydrusnetwork]
|
|
||||||
NAME="shortnamenospacesorsymbols"
|
|
||||||
API="apiaccesskeygoeshere"
|
|
||||||
URL="apibaseurlgoeshere"
|
|
||||||
</code></pre>
|
|
||||||
</figure>
|
|
||||||
BIN
docs/img/hydrus/edit-service.png
Normal file
BIN
docs/img/hydrus/edit-service.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 28 KiB |
Reference in New Issue
Block a user