dfdfsdd
This commit is contained in:
@@ -264,6 +264,170 @@ class HydrusNetwork(Store):
|
||||
|
||||
debug(f"Searching Hydrus for: {query}")
|
||||
|
||||
def _extract_urls(meta_obj: Any) -> list[str]:
|
||||
if not isinstance(meta_obj, dict):
|
||||
return []
|
||||
raw = meta_obj.get("url")
|
||||
if raw is None:
|
||||
raw = meta_obj.get("urls")
|
||||
if isinstance(raw, str):
|
||||
val = raw.strip()
|
||||
return [val] if val else []
|
||||
if isinstance(raw, list):
|
||||
out: list[str] = []
|
||||
for item in raw:
|
||||
if not isinstance(item, str):
|
||||
continue
|
||||
s = item.strip()
|
||||
if s:
|
||||
out.append(s)
|
||||
return out
|
||||
return []
|
||||
|
||||
def _iter_url_filtered_metadata(url_value: str | None, want_any: bool, fetch_limit: int) -> list[dict[str, Any]]:
|
||||
"""Best-effort URL search by scanning Hydrus metadata with include_file_url=True."""
|
||||
|
||||
# First try a fast system predicate if Hydrus supports it.
|
||||
candidate_file_ids: list[int] = []
|
||||
try:
|
||||
if want_any:
|
||||
predicate = "system:has url"
|
||||
url_search = client.search_files(
|
||||
tags=[predicate],
|
||||
return_hashes=False,
|
||||
return_file_ids=True,
|
||||
return_file_count=False,
|
||||
)
|
||||
ids = url_search.get("file_ids", []) if isinstance(url_search, dict) else []
|
||||
if isinstance(ids, list):
|
||||
candidate_file_ids = [int(x) for x in ids if isinstance(x, (int, float, str)) and str(x).strip().isdigit()]
|
||||
except Exception:
|
||||
candidate_file_ids = []
|
||||
|
||||
if not candidate_file_ids:
|
||||
# Fallback: scan from system:everything and filter by URL substring.
|
||||
everything = client.search_files(
|
||||
tags=["system:everything"],
|
||||
return_hashes=False,
|
||||
return_file_ids=True,
|
||||
return_file_count=False,
|
||||
)
|
||||
ids = everything.get("file_ids", []) if isinstance(everything, dict) else []
|
||||
if isinstance(ids, list):
|
||||
candidate_file_ids = [int(x) for x in ids if isinstance(x, (int, float))]
|
||||
|
||||
if not candidate_file_ids:
|
||||
return []
|
||||
|
||||
needle = (url_value or "").strip().lower()
|
||||
chunk_size = 200
|
||||
out: list[dict[str, Any]] = []
|
||||
|
||||
for start in range(0, len(candidate_file_ids), chunk_size):
|
||||
if len(out) >= fetch_limit:
|
||||
break
|
||||
chunk = candidate_file_ids[start : start + chunk_size]
|
||||
try:
|
||||
payload = client.fetch_file_metadata(
|
||||
file_ids=chunk,
|
||||
include_file_url=True,
|
||||
include_service_keys_to_tags=True,
|
||||
include_duration=True,
|
||||
include_size=True,
|
||||
include_mime=True,
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
metas = payload.get("metadata", []) if isinstance(payload, dict) else []
|
||||
if not isinstance(metas, list):
|
||||
continue
|
||||
|
||||
for meta in metas:
|
||||
if not isinstance(meta, dict):
|
||||
continue
|
||||
urls = _extract_urls(meta)
|
||||
if not urls:
|
||||
continue
|
||||
if want_any:
|
||||
out.append(meta)
|
||||
if len(out) >= fetch_limit:
|
||||
break
|
||||
continue
|
||||
|
||||
if not needle:
|
||||
continue
|
||||
if any(needle in u.lower() for u in urls):
|
||||
out.append(meta)
|
||||
if len(out) >= fetch_limit:
|
||||
break
|
||||
|
||||
return out
|
||||
|
||||
query_lower = query.lower().strip()
|
||||
|
||||
# Special case: url:* and url:<value>
|
||||
metadata_list: list[dict[str, Any]] | None = None
|
||||
if ":" in query_lower and not query_lower.startswith(":"):
|
||||
namespace, pattern = query_lower.split(":", 1)
|
||||
namespace = namespace.strip().lower()
|
||||
pattern = pattern.strip()
|
||||
if namespace == "url":
|
||||
if not pattern or pattern == "*":
|
||||
metadata_list = _iter_url_filtered_metadata(None, want_any=True, fetch_limit=int(limit) if limit else 100)
|
||||
else:
|
||||
# Fast-path: exact URL via /add_url/get_url_files when a full URL is provided.
|
||||
try:
|
||||
if pattern.startswith("http://") or pattern.startswith("https://"):
|
||||
from API.HydrusNetwork import HydrusRequestSpec
|
||||
|
||||
spec = HydrusRequestSpec(method="GET", endpoint="/add_url/get_url_files", query={"url": pattern})
|
||||
response = client._perform_request(spec) # type: ignore[attr-defined]
|
||||
hashes: list[str] = []
|
||||
file_ids: list[int] = []
|
||||
if isinstance(response, dict):
|
||||
raw_hashes = response.get("hashes") or response.get("file_hashes")
|
||||
if isinstance(raw_hashes, list):
|
||||
hashes = [str(h).strip() for h in raw_hashes if isinstance(h, str) and str(h).strip()]
|
||||
raw_ids = response.get("file_ids")
|
||||
if isinstance(raw_ids, list):
|
||||
for item in raw_ids:
|
||||
try:
|
||||
file_ids.append(int(item))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
|
||||
if file_ids:
|
||||
payload = client.fetch_file_metadata(
|
||||
file_ids=file_ids,
|
||||
include_file_url=True,
|
||||
include_service_keys_to_tags=True,
|
||||
include_duration=True,
|
||||
include_size=True,
|
||||
include_mime=True,
|
||||
)
|
||||
metas = payload.get("metadata", []) if isinstance(payload, dict) else []
|
||||
if isinstance(metas, list):
|
||||
metadata_list = [m for m in metas if isinstance(m, dict)]
|
||||
elif hashes:
|
||||
payload = client.fetch_file_metadata(
|
||||
hashes=hashes,
|
||||
include_file_url=True,
|
||||
include_service_keys_to_tags=True,
|
||||
include_duration=True,
|
||||
include_size=True,
|
||||
include_mime=True,
|
||||
)
|
||||
metas = payload.get("metadata", []) if isinstance(payload, dict) else []
|
||||
if isinstance(metas, list):
|
||||
metadata_list = [m for m in metas if isinstance(m, dict)]
|
||||
except Exception:
|
||||
metadata_list = None
|
||||
|
||||
# Fallback: substring scan
|
||||
if metadata_list is None:
|
||||
metadata_list = _iter_url_filtered_metadata(pattern, want_any=False, fetch_limit=int(limit) if limit else 100)
|
||||
|
||||
# Parse the query into tags
|
||||
# Handle both simple tags and complex queries
|
||||
# "*" means "match all" - use system:everything tag in Hydrus
|
||||
@@ -271,7 +435,6 @@ class HydrusNetwork(Store):
|
||||
# Use system:everything to match all files in Hydrus
|
||||
tags = ["system:everything"]
|
||||
else:
|
||||
query_lower = query.lower().strip()
|
||||
# If query doesn't have a namespace (no ':'), search all files and filter by title/tags
|
||||
# If query has explicit namespace, use it as a tag search
|
||||
if ':' not in query_lower:
|
||||
@@ -286,30 +449,36 @@ class HydrusNetwork(Store):
|
||||
debug(f"Found 0 result(s)")
|
||||
return []
|
||||
|
||||
# Search files with the tags
|
||||
search_result = client.search_files(
|
||||
tags=tags,
|
||||
return_hashes=True,
|
||||
return_file_ids=True
|
||||
)
|
||||
|
||||
# Extract file IDs from search result
|
||||
file_ids = search_result.get("file_ids", [])
|
||||
hashes = search_result.get("hashes", [])
|
||||
|
||||
if not file_ids and not hashes:
|
||||
debug(f"Found 0 result(s)")
|
||||
return []
|
||||
|
||||
# Fetch metadata for the found files
|
||||
# Search files with the tags (unless url: search already produced metadata)
|
||||
results = []
|
||||
query_lower = query.lower().strip()
|
||||
# Split by comma or space for AND logic
|
||||
search_terms = set(query_lower.replace(',', ' ').split()) # For substring matching
|
||||
|
||||
if file_ids:
|
||||
metadata = client.fetch_file_metadata(file_ids=file_ids)
|
||||
metadata_list = metadata.get("metadata", [])
|
||||
|
||||
if metadata_list is None:
|
||||
search_result = client.search_files(
|
||||
tags=tags,
|
||||
return_hashes=True,
|
||||
return_file_ids=True
|
||||
)
|
||||
|
||||
file_ids = search_result.get("file_ids", []) if isinstance(search_result, dict) else []
|
||||
hashes = search_result.get("hashes", []) if isinstance(search_result, dict) else []
|
||||
|
||||
if not file_ids and not hashes:
|
||||
debug(f"Found 0 result(s)")
|
||||
return []
|
||||
|
||||
if file_ids:
|
||||
metadata = client.fetch_file_metadata(file_ids=file_ids)
|
||||
metadata_list = metadata.get("metadata", [])
|
||||
elif hashes:
|
||||
metadata = client.fetch_file_metadata(hashes=hashes)
|
||||
metadata_list = metadata.get("metadata", [])
|
||||
else:
|
||||
metadata_list = []
|
||||
|
||||
if not isinstance(metadata_list, list):
|
||||
metadata_list = []
|
||||
|
||||
for meta in metadata_list:
|
||||
if len(results) >= limit:
|
||||
|
||||
Reference in New Issue
Block a user