AST
This commit is contained in:
159
helper/query_parser.py
Normal file
159
helper/query_parser.py
Normal file
@@ -0,0 +1,159 @@
|
||||
"""Dynamic query parser for filtering and field extraction.
|
||||
|
||||
Supports query syntax like:
|
||||
- isbn:0557677203
|
||||
- author:"Albert Pike"
|
||||
- title:"Morals and Dogma"
|
||||
- year:2010
|
||||
- isbn:0557677203 author:"Albert Pike"
|
||||
- Mixed with free text: "Morals" isbn:0557677203
|
||||
|
||||
This allows flexible query strings that can be parsed by any search provider
|
||||
to extract specific fields for filtering and searching.
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Tuple, Optional, Any
|
||||
import re
|
||||
|
||||
|
||||
def parse_query(query: str) -> Dict[str, Any]:
|
||||
"""Parse a query string into field:value pairs and free text.
|
||||
|
||||
Args:
|
||||
query: Query string like 'isbn:0557677203 author:"Albert Pike" Morals'
|
||||
|
||||
Returns:
|
||||
Dictionary with:
|
||||
- 'fields': Dict[field_name, field_value] for structured fields
|
||||
- 'text': str with remaining free text
|
||||
- 'raw': str original query
|
||||
"""
|
||||
result = {
|
||||
'fields': {},
|
||||
'text': '',
|
||||
'raw': query,
|
||||
}
|
||||
|
||||
if not query or not query.strip():
|
||||
return result
|
||||
|
||||
query = query.strip()
|
||||
remaining_parts = []
|
||||
|
||||
# Pattern to match: field:value or field:"quoted value"
|
||||
# Matches: word: followed by either quoted string or unquoted word
|
||||
pattern = r'(\w+):(?:"([^"]*)"|(\S+))'
|
||||
|
||||
pos = 0
|
||||
for match in re.finditer(pattern, query):
|
||||
# Add any text before this match
|
||||
if match.start() > pos:
|
||||
before_text = query[pos:match.start()].strip()
|
||||
if before_text:
|
||||
remaining_parts.append(before_text)
|
||||
|
||||
field_name = match.group(1).lower()
|
||||
field_value = match.group(2) if match.group(2) is not None else match.group(3)
|
||||
|
||||
result['fields'][field_name] = field_value
|
||||
pos = match.end()
|
||||
|
||||
# Add any remaining text after last match
|
||||
if pos < len(query):
|
||||
remaining_text = query[pos:].strip()
|
||||
if remaining_text:
|
||||
remaining_parts.append(remaining_text)
|
||||
|
||||
result['text'] = ' '.join(remaining_parts)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def get_field(parsed_query: Dict[str, Any], field_name: str, default: Optional[str] = None) -> Optional[str]:
|
||||
"""Get a field value from parsed query, with optional default.
|
||||
|
||||
Args:
|
||||
parsed_query: Result from parse_query()
|
||||
field_name: Field name to look up (case-insensitive)
|
||||
default: Default value if field not found
|
||||
|
||||
Returns:
|
||||
Field value or default
|
||||
"""
|
||||
return parsed_query.get('fields', {}).get(field_name.lower(), default)
|
||||
|
||||
|
||||
def has_field(parsed_query: Dict[str, Any], field_name: str) -> bool:
|
||||
"""Check if a field exists in parsed query.
|
||||
|
||||
Args:
|
||||
parsed_query: Result from parse_query()
|
||||
field_name: Field name to check (case-insensitive)
|
||||
|
||||
Returns:
|
||||
True if field exists
|
||||
"""
|
||||
return field_name.lower() in parsed_query.get('fields', {})
|
||||
|
||||
|
||||
def get_free_text(parsed_query: Dict[str, Any]) -> str:
|
||||
"""Get the free text portion of a parsed query.
|
||||
|
||||
Args:
|
||||
parsed_query: Result from parse_query()
|
||||
|
||||
Returns:
|
||||
Free text or empty string
|
||||
"""
|
||||
return parsed_query.get('text', '')
|
||||
|
||||
|
||||
def build_query_for_provider(
|
||||
parsed_query: Dict[str, Any],
|
||||
provider: str,
|
||||
extraction_map: Optional[Dict[str, str]] = None
|
||||
) -> Tuple[str, Dict[str, str]]:
|
||||
"""Build a search query and filters dict for a specific provider.
|
||||
|
||||
Different providers have different search syntax. This function
|
||||
extracts the appropriate fields for each provider.
|
||||
|
||||
Args:
|
||||
parsed_query: Result from parse_query()
|
||||
provider: Provider name ('libgen', 'openlibrary', 'soulseek')
|
||||
extraction_map: Optional mapping of field names to provider-specific names
|
||||
e.g. {'isbn': 'isbn', 'author': 'author', 'title': 'title'}
|
||||
|
||||
Returns:
|
||||
Tuple of (search_query: str, extracted_fields: Dict[field, value])
|
||||
"""
|
||||
extraction_map = extraction_map or {}
|
||||
extracted = {}
|
||||
free_text = get_free_text(parsed_query)
|
||||
|
||||
# Extract fields based on map
|
||||
for field_name, provider_key in extraction_map.items():
|
||||
if has_field(parsed_query, field_name):
|
||||
extracted[provider_key] = get_field(parsed_query, field_name)
|
||||
|
||||
# If provider-specific extraction needed, providers can implement it
|
||||
# For now, return the free text as query
|
||||
return free_text, extracted
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Test cases
|
||||
test_queries = [
|
||||
'isbn:0557677203',
|
||||
'isbn:0557677203 author:"Albert Pike"',
|
||||
'Morals and Dogma isbn:0557677203',
|
||||
'title:"Morals and Dogma" author:"Albert Pike" year:2010',
|
||||
'search term without fields',
|
||||
'author:"John Smith" title:"A Book"',
|
||||
]
|
||||
|
||||
for query in test_queries:
|
||||
print(f"\nQuery: {query}")
|
||||
parsed = parse_query(query)
|
||||
print(f" Fields: {parsed['fields']}")
|
||||
print(f" Text: {parsed['text']}")
|
||||
Reference in New Issue
Block a user