"""Dynamic query parser for filtering and field extraction. Supports query syntax like: - isbn:0557677203 - author:"Albert Pike" - title:"Morals and Dogma" - year:2010 - isbn:0557677203 author:"Albert Pike" - Mixed with free text: "Morals" isbn:0557677203 This allows flexible query strings that can be parsed by any search provider to extract specific fields for filtering and searching. """ from typing import Dict, List, Tuple, Optional, Any import re def parse_query(query: str) -> Dict[str, Any]: """Parse a query string into field:value pairs and free text. Args: query: Query string like 'isbn:0557677203 author:"Albert Pike" Morals' Returns: Dictionary with: - 'fields': Dict[field_name, field_value] for structured fields - 'text': str with remaining free text - 'raw': str original query """ result = { 'fields': {}, 'text': '', 'raw': query, } if not query or not query.strip(): return result query = query.strip() remaining_parts = [] # Pattern to match: field:value or field:"quoted value" # Matches: word: followed by either quoted string or unquoted word pattern = r'(\w+):(?:"([^"]*)"|(\S+))' pos = 0 for match in re.finditer(pattern, query): # Add any text before this match if match.start() > pos: before_text = query[pos:match.start()].strip() if before_text: remaining_parts.append(before_text) field_name = match.group(1).lower() field_value = match.group(2) if match.group(2) is not None else match.group(3) result['fields'][field_name] = field_value pos = match.end() # Add any remaining text after last match if pos < len(query): remaining_text = query[pos:].strip() if remaining_text: remaining_parts.append(remaining_text) result['text'] = ' '.join(remaining_parts) return result def get_field(parsed_query: Dict[str, Any], field_name: str, default: Optional[str] = None) -> Optional[str]: """Get a field value from parsed query, with optional default. Args: parsed_query: Result from parse_query() field_name: Field name to look up (case-insensitive) default: Default value if field not found Returns: Field value or default """ return parsed_query.get('fields', {}).get(field_name.lower(), default) def has_field(parsed_query: Dict[str, Any], field_name: str) -> bool: """Check if a field exists in parsed query. Args: parsed_query: Result from parse_query() field_name: Field name to check (case-insensitive) Returns: True if field exists """ return field_name.lower() in parsed_query.get('fields', {}) def get_free_text(parsed_query: Dict[str, Any]) -> str: """Get the free text portion of a parsed query. Args: parsed_query: Result from parse_query() Returns: Free text or empty string """ return parsed_query.get('text', '') def build_query_for_provider( parsed_query: Dict[str, Any], provider: str, extraction_map: Optional[Dict[str, str]] = None ) -> Tuple[str, Dict[str, str]]: """Build a search query and filters dict for a specific provider. Different providers have different search syntax. This function extracts the appropriate fields for each provider. Args: parsed_query: Result from parse_query() provider: Provider name ('libgen', 'openlibrary', 'soulseek') extraction_map: Optional mapping of field names to provider-specific names e.g. {'isbn': 'isbn', 'author': 'author', 'title': 'title'} Returns: Tuple of (search_query: str, extracted_fields: Dict[field, value]) """ extraction_map = extraction_map or {} extracted = {} free_text = get_free_text(parsed_query) # Extract fields based on map for field_name, provider_key in extraction_map.items(): if has_field(parsed_query, field_name): extracted[provider_key] = get_field(parsed_query, field_name) # If provider-specific extraction needed, providers can implement it # For now, return the free text as query return free_text, extracted if __name__ == '__main__': # Test cases test_queries = [ 'isbn:0557677203', 'isbn:0557677203 author:"Albert Pike"', 'Morals and Dogma isbn:0557677203', 'title:"Morals and Dogma" author:"Albert Pike" year:2010', 'search term without fields', 'author:"John Smith" title:"A Book"', ] for query in test_queries: print(f"\nQuery: {query}") parsed = parse_query(query) print(f" Fields: {parsed['fields']}") print(f" Text: {parsed['text']}")