f
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
import re
|
import re
|
||||||
from typing import Any, Dict, List, Set
|
from typing import Any, Dict, List, Optional, Set
|
||||||
|
|
||||||
|
|
||||||
def value_normalize(value: Any) -> str:
|
def value_normalize(value: Any) -> str:
|
||||||
@@ -19,6 +19,18 @@ def _add_tag(tags: List[str], namespace: str, value: str) -> None:
|
|||||||
tags.append(candidate)
|
tags.append(candidate)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_channel_from_tag(tag_value: str) -> Optional[str]:
|
||||||
|
"""Return the channel value if tag_value is namespaced with channel."""
|
||||||
|
if not tag_value:
|
||||||
|
return None
|
||||||
|
normalized = tag_value.strip().lower()
|
||||||
|
if not normalized.startswith("channel:"):
|
||||||
|
return None
|
||||||
|
_, _, remainder = normalized.partition(":")
|
||||||
|
remainder = remainder.strip()
|
||||||
|
return remainder or None
|
||||||
|
|
||||||
|
|
||||||
def extract_ytdlp_tags(entry: Dict[str, Any]) -> List[str]:
|
def extract_ytdlp_tags(entry: Dict[str, Any]) -> List[str]:
|
||||||
""" """
|
""" """
|
||||||
tags: List[str] = []
|
tags: List[str] = []
|
||||||
@@ -67,7 +79,12 @@ def extract_ytdlp_tags(entry: Dict[str, Any]) -> List[str]:
|
|||||||
for tag_value in tags_field:
|
for tag_value in tags_field:
|
||||||
if tag_value:
|
if tag_value:
|
||||||
normalized = value_normalize(str(tag_value))
|
normalized = value_normalize(str(tag_value))
|
||||||
if normalized and normalized not in tags:
|
if not normalized:
|
||||||
|
continue
|
||||||
|
channel_candidate = _extract_channel_from_tag(normalized)
|
||||||
|
if channel_candidate:
|
||||||
|
_add_tag(tags, "channel", channel_candidate)
|
||||||
|
if normalized not in tags:
|
||||||
tags.append(normalized)
|
tags.append(normalized)
|
||||||
elif isinstance(tags_field, dict):
|
elif isinstance(tags_field, dict):
|
||||||
# Tags is dict: {"key": "val"} → tag:key:val
|
# Tags is dict: {"key": "val"} → tag:key:val
|
||||||
@@ -83,10 +100,16 @@ def extract_ytdlp_tags(entry: Dict[str, Any]) -> List[str]:
|
|||||||
if tag_str:
|
if tag_str:
|
||||||
for tag_value in re.split(r'[,\s]+', tag_str):
|
for tag_value in re.split(r'[,\s]+', tag_str):
|
||||||
tag_value = tag_value.strip()
|
tag_value = tag_value.strip()
|
||||||
if tag_value:
|
if not tag_value:
|
||||||
normalized = value_normalize(tag_value)
|
continue
|
||||||
if normalized and normalized not in tags:
|
normalized = value_normalize(tag_value)
|
||||||
tags.append(normalized)
|
if not normalized:
|
||||||
|
continue
|
||||||
|
channel_candidate = _extract_channel_from_tag(normalized)
|
||||||
|
if channel_candidate:
|
||||||
|
_add_tag(tags, "channel", channel_candidate)
|
||||||
|
if normalized not in tags:
|
||||||
|
tags.append(normalized)
|
||||||
|
|
||||||
# Extract chapters as tags if present
|
# Extract chapters as tags if present
|
||||||
chapters = entry.get("chapters")
|
chapters = entry.get("chapters")
|
||||||
|
|||||||
@@ -454,6 +454,7 @@ class HydrusNetwork(Store):
|
|||||||
results = storage["hydrus"].search("Simple Man")
|
results = storage["hydrus"].search("Simple Man")
|
||||||
"""
|
"""
|
||||||
limit = kwargs.get("limit", 100)
|
limit = kwargs.get("limit", 100)
|
||||||
|
minimal = bool(kwargs.get("minimal", False))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
client = self._client
|
client = self._client
|
||||||
@@ -518,6 +519,8 @@ class HydrusNetwork(Store):
|
|||||||
fetch_limit: int,
|
fetch_limit: int,
|
||||||
scan_limit: int | None = None,
|
scan_limit: int | None = None,
|
||||||
needles: Optional[Sequence[str]] = None,
|
needles: Optional[Sequence[str]] = None,
|
||||||
|
*,
|
||||||
|
minimal: bool = False,
|
||||||
) -> list[dict[str, Any]]:
|
) -> list[dict[str, Any]]:
|
||||||
"""Best-effort URL search by scanning Hydrus metadata with include_file_url=True."""
|
"""Best-effort URL search by scanning Hydrus metadata with include_file_url=True."""
|
||||||
|
|
||||||
@@ -620,19 +623,19 @@ class HydrusNetwork(Store):
|
|||||||
payload = client.fetch_file_metadata(
|
payload = client.fetch_file_metadata(
|
||||||
hashes=chunk,
|
hashes=chunk,
|
||||||
include_file_url=True,
|
include_file_url=True,
|
||||||
include_service_keys_to_tags=True,
|
include_service_keys_to_tags=not minimal,
|
||||||
include_duration=True,
|
include_duration=not minimal,
|
||||||
include_size=True,
|
include_size=not minimal,
|
||||||
include_mime=True,
|
include_mime=not minimal,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
payload = client.fetch_file_metadata(
|
payload = client.fetch_file_metadata(
|
||||||
file_ids=chunk,
|
file_ids=chunk,
|
||||||
include_file_url=True,
|
include_file_url=True,
|
||||||
include_service_keys_to_tags=True,
|
include_service_keys_to_tags=not minimal,
|
||||||
include_duration=True,
|
include_duration=not minimal,
|
||||||
include_size=True,
|
include_size=not minimal,
|
||||||
include_mime=True,
|
include_mime=not minimal,
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
continue
|
continue
|
||||||
@@ -739,12 +742,14 @@ class HydrusNetwork(Store):
|
|||||||
want_any=False,
|
want_any=False,
|
||||||
fetch_limit=int(limit) if limit else 100,
|
fetch_limit=int(limit) if limit else 100,
|
||||||
needles=pattern_hints,
|
needles=pattern_hints,
|
||||||
|
minimal=minimal,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
metadata_list = _iter_url_filtered_metadata(
|
metadata_list = _iter_url_filtered_metadata(
|
||||||
None,
|
None,
|
||||||
want_any=True,
|
want_any=True,
|
||||||
fetch_limit=int(limit) if limit else 100
|
fetch_limit=int(limit) if limit else 100,
|
||||||
|
minimal=minimal,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
def _clean_url_search_token(value: str | None) -> str:
|
def _clean_url_search_token(value: str | None) -> str:
|
||||||
@@ -792,10 +797,10 @@ class HydrusNetwork(Store):
|
|||||||
payload = client.fetch_file_metadata(
|
payload = client.fetch_file_metadata(
|
||||||
file_ids=file_ids,
|
file_ids=file_ids,
|
||||||
include_file_url=True,
|
include_file_url=True,
|
||||||
include_service_keys_to_tags=True,
|
include_service_keys_to_tags=not minimal,
|
||||||
include_duration=True,
|
include_duration=not minimal,
|
||||||
include_size=True,
|
include_size=not minimal,
|
||||||
include_mime=True,
|
include_mime=not minimal,
|
||||||
)
|
)
|
||||||
metas = (
|
metas = (
|
||||||
payload.get("metadata",
|
payload.get("metadata",
|
||||||
@@ -810,10 +815,10 @@ class HydrusNetwork(Store):
|
|||||||
payload = client.fetch_file_metadata(
|
payload = client.fetch_file_metadata(
|
||||||
hashes=hashes,
|
hashes=hashes,
|
||||||
include_file_url=True,
|
include_file_url=True,
|
||||||
include_service_keys_to_tags=True,
|
include_service_keys_to_tags=not minimal,
|
||||||
include_duration=True,
|
include_duration=not minimal,
|
||||||
include_size=True,
|
include_size=not minimal,
|
||||||
include_mime=True,
|
include_mime=not minimal,
|
||||||
)
|
)
|
||||||
metas = (
|
metas = (
|
||||||
payload.get("metadata",
|
payload.get("metadata",
|
||||||
@@ -844,6 +849,7 @@ class HydrusNetwork(Store):
|
|||||||
fetch_limit=int(limit) if limit else 100,
|
fetch_limit=int(limit) if limit else 100,
|
||||||
scan_limit=scan_limit_override,
|
scan_limit=scan_limit_override,
|
||||||
needles=pattern_hints if pattern_hints else None,
|
needles=pattern_hints if pattern_hints else None,
|
||||||
|
minimal=minimal,
|
||||||
)
|
)
|
||||||
elif namespace == "system":
|
elif namespace == "system":
|
||||||
normalized_system_predicate = pattern.strip()
|
normalized_system_predicate = pattern.strip()
|
||||||
@@ -857,6 +863,7 @@ class HydrusNetwork(Store):
|
|||||||
want_any=not bool(pattern_hints),
|
want_any=not bool(pattern_hints),
|
||||||
fetch_limit=fetch_limit,
|
fetch_limit=fetch_limit,
|
||||||
needles=pattern_hints if pattern_hints else None,
|
needles=pattern_hints if pattern_hints else None,
|
||||||
|
minimal=minimal,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Parse the query into tags
|
# Parse the query into tags
|
||||||
|
|||||||
@@ -3253,6 +3253,20 @@ def check_url_exists_in_storage(
|
|||||||
|
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
def _dedupe_needles(raw_needles: Sequence[str]) -> List[str]:
|
||||||
|
output: List[str] = []
|
||||||
|
seen: set[str] = set()
|
||||||
|
for candidate in (raw_needles or []):
|
||||||
|
candidate_text = str(candidate or "").strip()
|
||||||
|
if not candidate_text:
|
||||||
|
continue
|
||||||
|
key = candidate_text.lower()
|
||||||
|
if key in seen:
|
||||||
|
continue
|
||||||
|
seen.add(key)
|
||||||
|
output.append(candidate_text)
|
||||||
|
return output
|
||||||
|
|
||||||
url_needles: Dict[str, List[str]] = {}
|
url_needles: Dict[str, List[str]] = {}
|
||||||
for u in unique_urls:
|
for u in unique_urls:
|
||||||
needles: List[str] = []
|
needles: List[str] = []
|
||||||
@@ -3301,7 +3315,8 @@ def check_url_exists_in_storage(
|
|||||||
normalized.append(norm_extra)
|
normalized.append(norm_extra)
|
||||||
|
|
||||||
combined = filtered + expanded + lowered + normalized
|
combined = filtered + expanded + lowered + normalized
|
||||||
url_needles[u] = combined if combined else [u]
|
deduped = _dedupe_needles(combined)
|
||||||
|
url_needles[u] = deduped if deduped else [u]
|
||||||
|
|
||||||
if in_pipeline:
|
if in_pipeline:
|
||||||
preflight_cache = _load_preflight_cache()
|
preflight_cache = _load_preflight_cache()
|
||||||
@@ -3341,7 +3356,10 @@ def check_url_exists_in_storage(
|
|||||||
if _timed_out("before backend scan"):
|
if _timed_out("before backend scan"):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
bulk_mode = len(unique_urls) > 1
|
# Use bulk mode only if we have a significant number of URLs.
|
||||||
|
# For small sets (1-3 URLs), individual targeted searches are faster
|
||||||
|
# and more accurate than scanning all files with URLs in the backend.
|
||||||
|
bulk_mode = len(unique_urls) > 3
|
||||||
|
|
||||||
def _build_bulk_patterns(needles_map: Dict[str, List[str]], max_per_url: int = 3, max_total: int = 240) -> List[str]:
|
def _build_bulk_patterns(needles_map: Dict[str, List[str]], max_per_url: int = 3, max_total: int = 240) -> List[str]:
|
||||||
patterns: List[str] = []
|
patterns: List[str] = []
|
||||||
@@ -3455,6 +3473,24 @@ def check_url_exists_in_storage(
|
|||||||
needles: Sequence[str],
|
needles: Sequence[str],
|
||||||
) -> Optional[Dict[str, Any]]:
|
) -> Optional[Dict[str, Any]]:
|
||||||
backend_hits: List[Dict[str, Any]] = []
|
backend_hits: List[Dict[str, Any]] = []
|
||||||
|
|
||||||
|
# 1) Try exact match first (no wildcards).
|
||||||
|
# This is extremely fast for Hydrus and others that support direct URL lookup.
|
||||||
|
for needle in (needles or [])[:5]:
|
||||||
|
needle_stripped = str(needle or "").strip()
|
||||||
|
if not needle_stripped or not _httpish(needle_stripped):
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
# Use 'url:' prefix to ensure storage layers (like Hydrus) recognize it as a URL lookup
|
||||||
|
query = f"url:{needle_stripped}"
|
||||||
|
backend_hits = backend.search(query, limit=1, minimal=True) or []
|
||||||
|
if backend_hits:
|
||||||
|
return _build_display_row_for_hit(backend_hits[0], backend_name, original_url)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 2) Fallback to wildcard substring search for normalized variants.
|
||||||
|
# This is for backends where the URL might be stored differently (partial match).
|
||||||
for needle in (needles or [])[:3]:
|
for needle in (needles or [])[:3]:
|
||||||
needle_text = str(needle or "").strip()
|
needle_text = str(needle or "").strip()
|
||||||
if not needle_text:
|
if not needle_text:
|
||||||
@@ -3462,7 +3498,7 @@ def check_url_exists_in_storage(
|
|||||||
search_needle = _normalize_url_for_search(needle_text) or needle_text
|
search_needle = _normalize_url_for_search(needle_text) or needle_text
|
||||||
query = f"url:*{search_needle}*"
|
query = f"url:*{search_needle}*"
|
||||||
try:
|
try:
|
||||||
backend_hits = backend.search(query, limit=1) or []
|
backend_hits = backend.search(query, limit=1, minimal=True) or []
|
||||||
if backend_hits:
|
if backend_hits:
|
||||||
break
|
break
|
||||||
except Exception:
|
except Exception:
|
||||||
@@ -3540,61 +3576,6 @@ def check_url_exists_in_storage(
|
|||||||
if _timed_out("hydrus scan"):
|
if _timed_out("hydrus scan"):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
if bulk_mode and bulk_patterns:
|
|
||||||
bulk_hits: Optional[List[Any]] = None
|
|
||||||
bulk_limit = min(2000, max(200, len(unique_urls) * 8))
|
|
||||||
try:
|
|
||||||
bulk_hits = backend.search(
|
|
||||||
"url:*",
|
|
||||||
limit=bulk_limit,
|
|
||||||
pattern_hint=bulk_patterns,
|
|
||||||
) or []
|
|
||||||
except Exception:
|
|
||||||
try:
|
|
||||||
bulk_hits = backend.search("url:*", limit=bulk_limit) or []
|
|
||||||
except Exception:
|
|
||||||
bulk_hits = None
|
|
||||||
|
|
||||||
if bulk_hits is None:
|
|
||||||
debug("Bulk URL preflight: Hydrus bulk scan failed; skipping per-URL checks")
|
|
||||||
continue
|
|
||||||
|
|
||||||
for hit in bulk_hits:
|
|
||||||
if _timed_out("hydrus bulk scan"):
|
|
||||||
return True
|
|
||||||
if len(match_rows) >= max_rows:
|
|
||||||
break
|
|
||||||
url_values = _extract_urls_from_hit(hit, backend, allow_backend_lookup=False)
|
|
||||||
if not url_values:
|
|
||||||
continue
|
|
||||||
|
|
||||||
for original_url, needles in url_needles.items():
|
|
||||||
if _timed_out("hydrus bulk scan"):
|
|
||||||
return True
|
|
||||||
if len(match_rows) >= max_rows:
|
|
||||||
break
|
|
||||||
if (original_url, str(backend_name)) in seen_pairs:
|
|
||||||
continue
|
|
||||||
|
|
||||||
matched = False
|
|
||||||
for url_value in url_values:
|
|
||||||
for needle in (needles or []):
|
|
||||||
if _match_normalized_url(str(needle or ""), str(url_value or "")):
|
|
||||||
matched = True
|
|
||||||
break
|
|
||||||
if matched:
|
|
||||||
break
|
|
||||||
|
|
||||||
if not matched:
|
|
||||||
continue
|
|
||||||
|
|
||||||
seen_pairs.add((original_url, str(backend_name)))
|
|
||||||
matched_urls.add(original_url)
|
|
||||||
match_rows.append(
|
|
||||||
_build_display_row_for_hit(hit, str(backend_name), original_url)
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
|
|
||||||
for original_url, needles in url_needles.items():
|
for original_url, needles in url_needles.items():
|
||||||
if _timed_out("hydrus per-url scan"):
|
if _timed_out("hydrus per-url scan"):
|
||||||
return True
|
return True
|
||||||
@@ -3616,7 +3597,6 @@ def check_url_exists_in_storage(
|
|||||||
endpoint="/add_urls/get_url_files",
|
endpoint="/add_urls/get_url_files",
|
||||||
query={"url": needle},
|
query={"url": needle},
|
||||||
)
|
)
|
||||||
# Access internal client safely if possible, else skip check
|
|
||||||
if hasattr(client, "_perform_request"):
|
if hasattr(client, "_perform_request"):
|
||||||
response = client._perform_request(spec)
|
response = client._perform_request(spec)
|
||||||
raw_hashes = None
|
raw_hashes = None
|
||||||
@@ -3638,11 +3618,6 @@ def check_url_exists_in_storage(
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
if not found:
|
if not found:
|
||||||
fallback_row = _search_backend_url_hits(backend, str(backend_name), original_url, needles)
|
|
||||||
if fallback_row:
|
|
||||||
seen_pairs.add((original_url, str(backend_name)))
|
|
||||||
matched_urls.add(original_url)
|
|
||||||
match_rows.append(fallback_row)
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
seen_pairs.add((original_url, str(backend_name)))
|
seen_pairs.add((original_url, str(backend_name)))
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import sys
|
|||||||
import shutil
|
import shutil
|
||||||
import tempfile
|
import tempfile
|
||||||
import re
|
import re
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from SYS import models
|
from SYS import models
|
||||||
from SYS import pipeline as ctx
|
from SYS import pipeline as ctx
|
||||||
@@ -14,6 +15,7 @@ from SYS.logger import log, debug, is_debug_enabled
|
|||||||
from SYS.pipeline_progress import PipelineProgress
|
from SYS.pipeline_progress import PipelineProgress
|
||||||
from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS
|
from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS
|
||||||
from Store import Store
|
from Store import Store
|
||||||
|
from API.HTTP import _download_direct_file
|
||||||
from . import _shared as sh
|
from . import _shared as sh
|
||||||
|
|
||||||
Cmdlet = sh.Cmdlet
|
Cmdlet = sh.Cmdlet
|
||||||
@@ -34,7 +36,7 @@ coerce_to_path = sh.coerce_to_path
|
|||||||
build_pipeline_preview = sh.build_pipeline_preview
|
build_pipeline_preview = sh.build_pipeline_preview
|
||||||
get_field = sh.get_field
|
get_field = sh.get_field
|
||||||
|
|
||||||
from SYS.utils import sha256_file, unique_path
|
from SYS.utils import sha256_file, unique_path, sanitize_filename
|
||||||
from SYS.metadata import write_metadata
|
from SYS.metadata import write_metadata
|
||||||
|
|
||||||
# Canonical supported filetypes for all stores/cmdlets
|
# Canonical supported filetypes for all stores/cmdlets
|
||||||
@@ -1079,6 +1081,62 @@ class Add_File(Cmdlet):
|
|||||||
pass
|
pass
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _build_provider_filename(
|
||||||
|
pipe_obj: models.PipeObject,
|
||||||
|
fallback_hash: Optional[str] = None,
|
||||||
|
source_url: Optional[str] = None,
|
||||||
|
) -> str:
|
||||||
|
title_candidates: List[str] = []
|
||||||
|
title_value = getattr(pipe_obj, "title", "")
|
||||||
|
if title_value:
|
||||||
|
title_candidates.append(str(title_value))
|
||||||
|
|
||||||
|
extra = getattr(pipe_obj, "extra", {})
|
||||||
|
if isinstance(extra, dict):
|
||||||
|
candid = extra.get("name") or extra.get("title")
|
||||||
|
if candid:
|
||||||
|
title_candidates.append(str(candid))
|
||||||
|
|
||||||
|
metadata = getattr(pipe_obj, "metadata", {})
|
||||||
|
if isinstance(metadata, dict):
|
||||||
|
meta_name = metadata.get("title") or metadata.get("name")
|
||||||
|
if meta_name:
|
||||||
|
title_candidates.append(str(meta_name))
|
||||||
|
|
||||||
|
text = ""
|
||||||
|
for candidate in title_candidates:
|
||||||
|
if candidate:
|
||||||
|
text = candidate.strip()
|
||||||
|
if text:
|
||||||
|
break
|
||||||
|
|
||||||
|
if not text and fallback_hash:
|
||||||
|
text = fallback_hash[:8]
|
||||||
|
|
||||||
|
safe_name = sanitize_filename(text or "download")
|
||||||
|
|
||||||
|
ext = ""
|
||||||
|
if isinstance(metadata, dict):
|
||||||
|
ext = metadata.get("ext") or metadata.get("extension") or ""
|
||||||
|
if not ext and isinstance(extra, dict):
|
||||||
|
ext = extra.get("ext") or ""
|
||||||
|
if not ext and source_url:
|
||||||
|
try:
|
||||||
|
parsed = urlparse(source_url)
|
||||||
|
ext = Path(parsed.path).suffix.lstrip(".")
|
||||||
|
except Exception:
|
||||||
|
ext = ""
|
||||||
|
|
||||||
|
if ext:
|
||||||
|
ext_text = str(ext)
|
||||||
|
if not ext_text.startswith("."):
|
||||||
|
ext_text = "." + ext_text.lstrip(".")
|
||||||
|
if not safe_name.lower().endswith(ext_text.lower()):
|
||||||
|
safe_name = f"{safe_name}{ext_text}"
|
||||||
|
|
||||||
|
return safe_name or "download"
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _resolve_backend_by_name(store: Any, backend_name: str) -> Optional[Any]:
|
def _resolve_backend_by_name(store: Any, backend_name: str) -> Optional[Any]:
|
||||||
if not store or not backend_name:
|
if not store or not backend_name:
|
||||||
@@ -1219,6 +1277,32 @@ class Add_File(Cmdlet):
|
|||||||
)
|
)
|
||||||
if dl_path and dl_path.exists():
|
if dl_path and dl_path.exists():
|
||||||
return dl_path, str(r_hash), tmp_dir
|
return dl_path, str(r_hash), tmp_dir
|
||||||
|
source_url = str(source).strip()
|
||||||
|
if source_url.lower().startswith(("http://", "https://")):
|
||||||
|
download_dir = Path(tempfile.mkdtemp(prefix="add-file-src-"))
|
||||||
|
try:
|
||||||
|
filename = Add_File._build_provider_filename(
|
||||||
|
pipe_obj,
|
||||||
|
str(r_hash),
|
||||||
|
source_url,
|
||||||
|
)
|
||||||
|
downloaded = _download_direct_file(
|
||||||
|
source_url,
|
||||||
|
download_dir,
|
||||||
|
quiet=True,
|
||||||
|
suggested_filename=filename,
|
||||||
|
)
|
||||||
|
downloaded_path = downloaded.path
|
||||||
|
if downloaded_path and downloaded_path.exists():
|
||||||
|
pipe_obj.is_temp = True
|
||||||
|
pipe_obj.path = str(downloaded_path)
|
||||||
|
return downloaded_path, str(r_hash), download_dir
|
||||||
|
except Exception as exc:
|
||||||
|
debug(f"[add-file] Provider download failed: {exc}")
|
||||||
|
try:
|
||||||
|
shutil.rmtree(download_dir, ignore_errors=True)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|||||||
@@ -241,95 +241,32 @@ class Get_Url(Cmdlet):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _resolve_title_for_hash(backend: Any, file_hash: str, hit: Any = None) -> str:
|
def _extract_size_from_hit(hit: Any) -> int | None:
|
||||||
"""Best-effort title resolution for a found hash.
|
for key in ("size", "file_size", "filesize", "size_bytes"):
|
||||||
|
try:
|
||||||
Strategy:
|
val = get_field(hit, key)
|
||||||
- Use the hit's existing title/columns when present.
|
except Exception:
|
||||||
- Prefer backend.get_metadata(hash) when available (direct lookup).
|
val = None
|
||||||
- Fallback to backend.search('hash:<sha>', limit=1) and read title.
|
if val is None:
|
||||||
"""
|
continue
|
||||||
try:
|
if isinstance(val, (int, float)):
|
||||||
if hit is not None:
|
return int(val)
|
||||||
from_hit = Get_Url._extract_title_from_result(hit)
|
try:
|
||||||
if from_hit:
|
return int(val)
|
||||||
return from_hit
|
except Exception:
|
||||||
except Exception:
|
continue
|
||||||
pass
|
return None
|
||||||
|
|
||||||
try:
|
|
||||||
if hasattr(backend, "get_metadata"):
|
|
||||||
meta = backend.get_metadata(file_hash)
|
|
||||||
if isinstance(meta, dict):
|
|
||||||
t = meta.get("title")
|
|
||||||
if isinstance(t, str) and t.strip():
|
|
||||||
return t.strip()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
try:
|
|
||||||
if hasattr(backend, "search"):
|
|
||||||
hits = backend.search(f"hash:{file_hash}", limit=1)
|
|
||||||
if isinstance(hits, list) and hits:
|
|
||||||
t2 = Get_Url._extract_title_from_result(hits[0])
|
|
||||||
if t2:
|
|
||||||
return t2
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return ""
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _resolve_size_ext_for_hash(backend: Any, file_hash: str, hit: Any = None) -> tuple[int | None, str]:
|
def _extract_ext_from_hit(hit: Any) -> str:
|
||||||
"""Best-effort (size, ext) resolution for a found hash."""
|
for key in ("ext", "extension"):
|
||||||
# First: see if the hit already includes these fields.
|
try:
|
||||||
try:
|
ext_val = get_field(hit, key)
|
||||||
size_val = get_field(hit, "size")
|
except Exception:
|
||||||
if size_val is None:
|
ext_val = None
|
||||||
size_val = get_field(hit, "file_size")
|
if isinstance(ext_val, str) and ext_val.strip():
|
||||||
if size_val is None:
|
return ext_val.strip().lstrip(".")
|
||||||
size_val = get_field(hit, "filesize")
|
return ""
|
||||||
if size_val is None:
|
|
||||||
size_val = get_field(hit, "size_bytes")
|
|
||||||
size_int = int(size_val) if isinstance(size_val, (int, float)) else None
|
|
||||||
except Exception:
|
|
||||||
size_int = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
ext_val = get_field(hit, "ext")
|
|
||||||
if ext_val is None:
|
|
||||||
ext_val = get_field(hit, "extension")
|
|
||||||
ext = str(ext_val).strip().lstrip(".") if isinstance(ext_val, str) else ""
|
|
||||||
except Exception:
|
|
||||||
ext = ""
|
|
||||||
|
|
||||||
if size_int is not None or ext:
|
|
||||||
return size_int, ext
|
|
||||||
|
|
||||||
# Next: backend.get_metadata(hash) when available.
|
|
||||||
try:
|
|
||||||
if hasattr(backend, "get_metadata"):
|
|
||||||
meta = backend.get_metadata(file_hash)
|
|
||||||
if isinstance(meta, dict):
|
|
||||||
size_val2 = meta.get("size")
|
|
||||||
if size_val2 is None:
|
|
||||||
size_val2 = meta.get("file_size")
|
|
||||||
if size_val2 is None:
|
|
||||||
size_val2 = meta.get("filesize")
|
|
||||||
if size_val2 is None:
|
|
||||||
size_val2 = meta.get("size_bytes")
|
|
||||||
if isinstance(size_val2, (int, float)):
|
|
||||||
size_int = int(size_val2)
|
|
||||||
|
|
||||||
ext_val2 = meta.get("ext")
|
|
||||||
if ext_val2 is None:
|
|
||||||
ext_val2 = meta.get("extension")
|
|
||||||
if isinstance(ext_val2, str) and ext_val2.strip():
|
|
||||||
ext = ext_val2.strip().lstrip(".")
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return size_int, ext
|
|
||||||
|
|
||||||
def _search_urls_across_stores(self,
|
def _search_urls_across_stores(self,
|
||||||
pattern: str,
|
pattern: str,
|
||||||
@@ -360,9 +297,6 @@ class Get_Url(Cmdlet):
|
|||||||
try:
|
try:
|
||||||
backend = storage[store_name]
|
backend = storage[store_name]
|
||||||
|
|
||||||
title_cache: Dict[str, str] = {}
|
|
||||||
meta_cache: Dict[str, tuple[int | None, str]] = {}
|
|
||||||
|
|
||||||
# Search only URL-bearing records using the backend's URL search capability.
|
# Search only URL-bearing records using the backend's URL search capability.
|
||||||
# This avoids the expensive/incorrect "search('*')" scan.
|
# This avoids the expensive/incorrect "search('*')" scan.
|
||||||
try:
|
try:
|
||||||
@@ -431,22 +365,12 @@ class Get_Url(Cmdlet):
|
|||||||
search_limit,
|
search_limit,
|
||||||
store_name,
|
store_name,
|
||||||
pattern_hint=target_pattern,
|
pattern_hint=target_pattern,
|
||||||
|
minimal=True,
|
||||||
)
|
)
|
||||||
if search_results is None:
|
if search_results is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
search_results = search_results or []
|
search_results = search_results or []
|
||||||
if not search_results and target_pattern and not has_wildcards:
|
|
||||||
fallback_results = self._execute_search_with_timeout(
|
|
||||||
backend,
|
|
||||||
"url:*",
|
|
||||||
search_limit,
|
|
||||||
store_name,
|
|
||||||
pattern_hint=target_pattern,
|
|
||||||
)
|
|
||||||
if fallback_results is None:
|
|
||||||
continue
|
|
||||||
search_results = fallback_results or []
|
|
||||||
|
|
||||||
for hit in (search_results or []):
|
for hit in (search_results or []):
|
||||||
if len(items) >= MAX_RESULTS:
|
if len(items) >= MAX_RESULTS:
|
||||||
@@ -459,44 +383,9 @@ class Get_Url(Cmdlet):
|
|||||||
|
|
||||||
file_hash = str(file_hash)
|
file_hash = str(file_hash)
|
||||||
|
|
||||||
title = title_cache.get(file_hash, "")
|
title = self._extract_title_from_result(hit) or ""
|
||||||
if not title:
|
size = self._extract_size_from_hit(hit)
|
||||||
try:
|
ext = self._extract_ext_from_hit(hit)
|
||||||
title = (
|
|
||||||
get_field(hit, "title")
|
|
||||||
or get_field(hit, "name")
|
|
||||||
or get_field(hit, "file_title")
|
|
||||||
or ""
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
title = ""
|
|
||||||
if not title:
|
|
||||||
title = self._resolve_title_for_hash(backend, file_hash, hit)
|
|
||||||
title_cache[file_hash] = title
|
|
||||||
|
|
||||||
size, ext = meta_cache.get(file_hash, (None, ""))
|
|
||||||
if size is None and not ext:
|
|
||||||
try:
|
|
||||||
size = get_field(hit, "size")
|
|
||||||
if size is None:
|
|
||||||
size = get_field(hit, "size_bytes")
|
|
||||||
if size is None:
|
|
||||||
size = get_field(hit, "file_size")
|
|
||||||
if size is None:
|
|
||||||
size = get_field(hit, "filesize")
|
|
||||||
size = int(size) if isinstance(size, (int, float)) else None
|
|
||||||
except Exception:
|
|
||||||
size = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
ext = get_field(hit, "ext") or get_field(hit, "extension")
|
|
||||||
ext = str(ext).strip().lstrip(".") if isinstance(ext, str) else ""
|
|
||||||
except Exception:
|
|
||||||
ext = ""
|
|
||||||
|
|
||||||
if size is None and not ext:
|
|
||||||
size, ext = self._resolve_size_ext_for_hash(backend, file_hash, hit)
|
|
||||||
meta_cache[file_hash] = (size, ext)
|
|
||||||
|
|
||||||
urls = self._extract_urls_from_hit(hit)
|
urls = self._extract_urls_from_hit(hit)
|
||||||
if not urls:
|
if not urls:
|
||||||
@@ -505,6 +394,7 @@ class Get_Url(Cmdlet):
|
|||||||
except Exception:
|
except Exception:
|
||||||
urls = []
|
urls = []
|
||||||
|
|
||||||
|
hit_added = False
|
||||||
for url in (urls or []):
|
for url in (urls or []):
|
||||||
if len(items) >= MAX_RESULTS:
|
if len(items) >= MAX_RESULTS:
|
||||||
break
|
break
|
||||||
@@ -526,7 +416,9 @@ class Get_Url(Cmdlet):
|
|||||||
ext=str(ext or ""),
|
ext=str(ext or ""),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
found_stores.add(str(store_name))
|
hit_added = True
|
||||||
|
if hit_added:
|
||||||
|
found_stores.add(str(store_name))
|
||||||
if len(items) >= MAX_RESULTS:
|
if len(items) >= MAX_RESULTS:
|
||||||
break
|
break
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
|
|||||||
Reference in New Issue
Block a user