sd
This commit is contained in:
@@ -567,6 +567,25 @@ class HydrusNetwork:
|
|||||||
"batched": results
|
"batched": results
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def get_url_info(self, url: str) -> dict[str, Any]:
|
||||||
|
"""Get information about a URL.
|
||||||
|
|
||||||
|
Hydrus Client API: GET /add_urls/get_url_info
|
||||||
|
Docs: https://hydrusnetwork.github.io/hydrus/developer_api.html#add_urls_get_url_info
|
||||||
|
"""
|
||||||
|
url = str(url or "").strip()
|
||||||
|
if not url:
|
||||||
|
raise ValueError("url must not be empty")
|
||||||
|
|
||||||
|
spec = HydrusRequestSpec(
|
||||||
|
method="GET",
|
||||||
|
endpoint="/add_urls/get_url_info",
|
||||||
|
query={
|
||||||
|
"url": url
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return cast(dict[str, Any], self._perform_request(spec))
|
||||||
|
|
||||||
def delete_url(self,
|
def delete_url(self,
|
||||||
file_hashes: Union[str,
|
file_hashes: Union[str,
|
||||||
Iterable[str]],
|
Iterable[str]],
|
||||||
|
|||||||
@@ -1476,6 +1476,36 @@ class HydrusNetwork(Store):
|
|||||||
debug(f"{self._log_prefix()} get_url failed: {exc}")
|
debug(f"{self._log_prefix()} get_url failed: {exc}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
def get_url_info(self, url: str, **kwargs: Any) -> dict[str, Any] | None:
|
||||||
|
"""Return Hydrus URL info for a single URL (Hydrus-only helper).
|
||||||
|
|
||||||
|
Uses: GET /add_urls/get_url_info
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
client = self._client
|
||||||
|
if client is None:
|
||||||
|
return None
|
||||||
|
u = str(url or "").strip()
|
||||||
|
if not u:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return client.get_url_info(u) # type: ignore[attr-defined]
|
||||||
|
except Exception:
|
||||||
|
from API.HydrusNetwork import HydrusRequestSpec
|
||||||
|
|
||||||
|
spec = HydrusRequestSpec(
|
||||||
|
method="GET",
|
||||||
|
endpoint="/add_urls/get_url_info",
|
||||||
|
query={
|
||||||
|
"url": u
|
||||||
|
},
|
||||||
|
)
|
||||||
|
response = client._perform_request(spec) # type: ignore[attr-defined]
|
||||||
|
return response if isinstance(response, dict) else None
|
||||||
|
except Exception as exc:
|
||||||
|
debug(f"{self._log_prefix()} get_url_info failed: {exc}")
|
||||||
|
return None
|
||||||
|
|
||||||
def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
|
def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
|
||||||
"""Associate one or more url with a Hydrus file."""
|
"""Associate one or more url with a Hydrus file."""
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ Cmdlet, SharedArgs, parse_cmdlet_args, get_field, normalize_hash = (
|
|||||||
sh.normalize_hash,
|
sh.normalize_hash,
|
||||||
)
|
)
|
||||||
from SYS.logger import log
|
from SYS.logger import log
|
||||||
|
from SYS.result_table import ResultTable
|
||||||
from Store import Store
|
from Store import Store
|
||||||
from SYS import pipeline as ctx
|
from SYS import pipeline as ctx
|
||||||
|
|
||||||
@@ -25,6 +26,7 @@ class UrlItem:
|
|||||||
url: str
|
url: str
|
||||||
hash: str
|
hash: str
|
||||||
store: str
|
store: str
|
||||||
|
title: str = ""
|
||||||
|
|
||||||
|
|
||||||
class Get_Url(Cmdlet):
|
class Get_Url(Cmdlet):
|
||||||
@@ -80,6 +82,107 @@ class Get_Url(Cmdlet):
|
|||||||
# Use fnmatch for wildcard matching (* and ?)
|
# Use fnmatch for wildcard matching (* and ?)
|
||||||
return fnmatch(normalized_url, normalized_pattern)
|
return fnmatch(normalized_url, normalized_pattern)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_first_url(value: Any) -> Optional[str]:
|
||||||
|
if isinstance(value, str):
|
||||||
|
v = value.strip()
|
||||||
|
return v or None
|
||||||
|
if isinstance(value, (list, tuple)):
|
||||||
|
for item in value:
|
||||||
|
if isinstance(item, str) and item.strip():
|
||||||
|
return item.strip()
|
||||||
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_url_from_result(result: Any) -> Optional[str]:
|
||||||
|
# Prefer explicit url field.
|
||||||
|
u = Get_Url._extract_first_url(get_field(result, "url"))
|
||||||
|
if u:
|
||||||
|
return u
|
||||||
|
|
||||||
|
# Fall back to ResultTable-style columns list.
|
||||||
|
cols = None
|
||||||
|
if isinstance(result, dict):
|
||||||
|
cols = result.get("columns")
|
||||||
|
else:
|
||||||
|
cols = getattr(result, "columns", None)
|
||||||
|
if isinstance(cols, list):
|
||||||
|
for pair in cols:
|
||||||
|
try:
|
||||||
|
if isinstance(pair, (list, tuple)) and len(pair) == 2:
|
||||||
|
k, v = pair
|
||||||
|
if str(k or "").strip().lower() in {"url", "urls"}:
|
||||||
|
u2 = Get_Url._extract_first_url(v)
|
||||||
|
if u2:
|
||||||
|
return u2
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_title_from_result(result: Any) -> Optional[str]:
|
||||||
|
# Prefer explicit title field.
|
||||||
|
t = get_field(result, "title")
|
||||||
|
if isinstance(t, str) and t.strip():
|
||||||
|
return t.strip()
|
||||||
|
|
||||||
|
# Fall back to ResultTable-style columns list.
|
||||||
|
cols = None
|
||||||
|
if isinstance(result, dict):
|
||||||
|
cols = result.get("columns")
|
||||||
|
else:
|
||||||
|
cols = getattr(result, "columns", None)
|
||||||
|
if isinstance(cols, list):
|
||||||
|
for pair in cols:
|
||||||
|
try:
|
||||||
|
if isinstance(pair, (list, tuple)) and len(pair) == 2:
|
||||||
|
k, v = pair
|
||||||
|
if str(k or "").strip().lower() in {"title", "name"}:
|
||||||
|
if isinstance(v, str) and v.strip():
|
||||||
|
return v.strip()
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _resolve_title_for_hash(backend: Any, file_hash: str, hit: Any = None) -> str:
|
||||||
|
"""Best-effort title resolution for a found hash.
|
||||||
|
|
||||||
|
Strategy:
|
||||||
|
- Use the hit's existing title/columns when present.
|
||||||
|
- Prefer backend.get_metadata(hash) when available (direct lookup).
|
||||||
|
- Fallback to backend.search('hash:<sha>', limit=1) and read title.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if hit is not None:
|
||||||
|
from_hit = Get_Url._extract_title_from_result(hit)
|
||||||
|
if from_hit:
|
||||||
|
return from_hit
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
if hasattr(backend, "get_metadata"):
|
||||||
|
meta = backend.get_metadata(file_hash)
|
||||||
|
if isinstance(meta, dict):
|
||||||
|
t = meta.get("title")
|
||||||
|
if isinstance(t, str) and t.strip():
|
||||||
|
return t.strip()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
if hasattr(backend, "search"):
|
||||||
|
hits = backend.search(f"hash:{file_hash}", limit=1)
|
||||||
|
if isinstance(hits, list) and hits:
|
||||||
|
t2 = Get_Url._extract_title_from_result(hits[0])
|
||||||
|
if t2:
|
||||||
|
return t2
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return ""
|
||||||
|
|
||||||
def _search_urls_across_stores(self,
|
def _search_urls_across_stores(self,
|
||||||
pattern: str,
|
pattern: str,
|
||||||
config: Dict[str,
|
config: Dict[str,
|
||||||
@@ -106,39 +209,70 @@ class Get_Url(Cmdlet):
|
|||||||
try:
|
try:
|
||||||
backend = storage[store_name]
|
backend = storage[store_name]
|
||||||
|
|
||||||
# Try to search files in this backend
|
title_cache: Dict[str, str] = {}
|
||||||
# For now, we'll iterate through known files (this is a limitation)
|
|
||||||
# Each backend should ideally support get_all_files() or similar
|
|
||||||
# For now, we use search with a broad query to find candidates
|
|
||||||
try:
|
|
||||||
# Try to get files via search (backend-specific)
|
|
||||||
search_results = backend.search("*", limit=1000)
|
|
||||||
if search_results:
|
|
||||||
for result in search_results:
|
|
||||||
file_hash = result.get("hash"
|
|
||||||
) or result.get("file_hash")
|
|
||||||
if not file_hash:
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
# Search only URL-bearing records using the backend's URL search capability.
|
||||||
urls = backend.get_url(file_hash)
|
# This avoids the expensive/incorrect "search('*')" scan.
|
||||||
if urls:
|
try:
|
||||||
for url in urls:
|
raw_pattern = str(pattern or "").strip()
|
||||||
if self._match_url_pattern(str(url),
|
has_wildcards = any(ch in raw_pattern for ch in ("*", "?"))
|
||||||
pattern):
|
|
||||||
items.append(
|
# If this is a Hydrus backend and the pattern is a single URL,
|
||||||
UrlItem(
|
# normalize it through the official API.
|
||||||
url=str(url),
|
normalized_url = None
|
||||||
hash=file_hash,
|
if not has_wildcards and hasattr(backend, "get_url_info"):
|
||||||
store=store_name,
|
try:
|
||||||
)
|
info = backend.get_url_info(raw_pattern) # type: ignore[attr-defined]
|
||||||
)
|
if isinstance(info, dict):
|
||||||
found_stores.add(store_name)
|
norm = info.get("normalised_url") or info.get("normalized_url")
|
||||||
except Exception:
|
if isinstance(norm, str) and norm.strip():
|
||||||
pass
|
normalized_url = norm.strip()
|
||||||
except Exception:
|
except Exception:
|
||||||
# Backend might not support search; skip
|
normalized_url = None
|
||||||
pass
|
|
||||||
|
search_query = "url:*" if has_wildcards else f"url:{normalized_url or raw_pattern}"
|
||||||
|
try:
|
||||||
|
search_results = backend.search(search_query, limit=1000)
|
||||||
|
except Exception:
|
||||||
|
search_results = []
|
||||||
|
|
||||||
|
for hit in (search_results or []):
|
||||||
|
file_hash = None
|
||||||
|
if isinstance(hit, dict):
|
||||||
|
file_hash = hit.get("hash") or hit.get("file_hash")
|
||||||
|
if not file_hash:
|
||||||
|
continue
|
||||||
|
|
||||||
|
file_hash = str(file_hash)
|
||||||
|
|
||||||
|
title = title_cache.get(file_hash, "")
|
||||||
|
if not title:
|
||||||
|
title = self._resolve_title_for_hash(backend, file_hash, hit)
|
||||||
|
title_cache[file_hash] = title
|
||||||
|
|
||||||
|
try:
|
||||||
|
urls = backend.get_url(file_hash)
|
||||||
|
except Exception:
|
||||||
|
urls = []
|
||||||
|
|
||||||
|
for url in (urls or []):
|
||||||
|
if not self._match_url_pattern(str(url), raw_pattern):
|
||||||
|
continue
|
||||||
|
items.append(
|
||||||
|
UrlItem(
|
||||||
|
url=str(url),
|
||||||
|
hash=str(file_hash),
|
||||||
|
store=str(store_name),
|
||||||
|
title=str(title or ""),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
found_stores.add(str(store_name))
|
||||||
|
except Exception as exc:
|
||||||
|
debug(
|
||||||
|
f"Error searching store '{store_name}': {exc}",
|
||||||
|
file=sys.stderr
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
except KeyError:
|
except KeyError:
|
||||||
continue
|
continue
|
||||||
@@ -162,6 +296,10 @@ class Get_Url(Cmdlet):
|
|||||||
# Check if user provided a URL pattern to search for
|
# Check if user provided a URL pattern to search for
|
||||||
search_pattern = parsed.get("url")
|
search_pattern = parsed.get("url")
|
||||||
|
|
||||||
|
# Allow piping a URL row (or any result with a url field/column) into get-url.
|
||||||
|
if not search_pattern:
|
||||||
|
search_pattern = self._extract_url_from_result(result)
|
||||||
|
|
||||||
if search_pattern:
|
if search_pattern:
|
||||||
# URL search mode: find all files with matching URLs across stores
|
# URL search mode: find all files with matching URLs across stores
|
||||||
items, stores_searched = self._search_urls_across_stores(search_pattern, config)
|
items, stores_searched = self._search_urls_across_stores(search_pattern, config)
|
||||||
@@ -170,9 +308,6 @@ class Get_Url(Cmdlet):
|
|||||||
log(f"No urls matching pattern: {search_pattern}", file=sys.stderr)
|
log(f"No urls matching pattern: {search_pattern}", file=sys.stderr)
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
# Create result table
|
|
||||||
from SYS.result_table import ResultTable
|
|
||||||
|
|
||||||
table = (
|
table = (
|
||||||
ResultTable(
|
ResultTable(
|
||||||
"URL Search Results",
|
"URL Search Results",
|
||||||
@@ -181,28 +316,12 @@ class Get_Url(Cmdlet):
|
|||||||
)
|
)
|
||||||
table.set_source_command("get-url", ["-url", search_pattern])
|
table.set_source_command("get-url", ["-url", search_pattern])
|
||||||
|
|
||||||
# Group by store for display
|
|
||||||
by_store: Dict[str,
|
|
||||||
List[UrlItem]] = {}
|
|
||||||
for item in items:
|
for item in items:
|
||||||
if item.store not in by_store:
|
row = table.add_row()
|
||||||
by_store[item.store] = []
|
row.add_column("Title", item.title)
|
||||||
by_store[item.store].append(item)
|
row.add_column("Url", item.url)
|
||||||
|
row.add_column("Store", item.store)
|
||||||
# Add rows grouped by store
|
ctx.emit(item)
|
||||||
for store_name in sorted(by_store.keys()):
|
|
||||||
store_items = by_store[store_name]
|
|
||||||
for idx, item in enumerate(store_items):
|
|
||||||
row = table.add_row()
|
|
||||||
if idx == 0:
|
|
||||||
row.add_column("Store", store_name)
|
|
||||||
else:
|
|
||||||
row.add_column("Store", "")
|
|
||||||
row.add_column("Url", item.url)
|
|
||||||
# Normalize for display
|
|
||||||
normalized = self._normalize_url_for_search(item.url)
|
|
||||||
row.add_column("Hash", item.hash[:16]) # Show first 16 chars
|
|
||||||
ctx.emit(item)
|
|
||||||
|
|
||||||
ctx.set_last_result_table(table if items else None, items, subject=result)
|
ctx.set_last_result_table(table if items else None, items, subject=result)
|
||||||
log(
|
log(
|
||||||
@@ -243,8 +362,6 @@ class Get_Url(Cmdlet):
|
|||||||
|
|
||||||
urls = backend.get_url(file_hash)
|
urls = backend.get_url(file_hash)
|
||||||
|
|
||||||
from SYS.result_table import ResultTable
|
|
||||||
|
|
||||||
title = str(get_field(result, "title") or "").strip()
|
title = str(get_field(result, "title") or "").strip()
|
||||||
table_title = "Title"
|
table_title = "Title"
|
||||||
if title:
|
if title:
|
||||||
|
|||||||
Reference in New Issue
Block a user