From 0c800398a2e8df08cbbac04d94d4b332ab938b98 Mon Sep 17 00:00:00 2001 From: Ryo Okubo Date: Sat, 1 Feb 2025 09:43:36 +0900 Subject: [PATCH] Enhance Advanced Search documentation and join attrs implementation - Update advanced_search.md with detailed explanation of Join Attrs behavior - Add implementation notes for pagination, performance, and result count considerations - Refactor AdvancedSearchAPI with improved type hints and code comments - Clarify join attribute search method behavior and potential query implications --- docs/content/advanced/advanced_search.md | 55 ++++++++++++++++++------ entry/api_v2/views.py | 42 ++++++++++++------ 2 files changed, 72 insertions(+), 25 deletions(-) diff --git a/docs/content/advanced/advanced_search.md b/docs/content/advanced/advanced_search.md index ac1b3ecf5..ce4487416 100644 --- a/docs/content/advanced/advanced_search.md +++ b/docs/content/advanced/advanced_search.md @@ -31,17 +31,48 @@ Advanced Search is a powerful feature that allows you to search across multiple ### Advanced Features -- **Search Chain** - - Follow relationships between entries - - Search through referenced objects - - Chain multiple searches to traverse complex relationships - - Results include both direct matches and related entries - -- **Export Functionality** - - Export search results to various formats - - Asynchronous processing for large result sets - - Progress tracking for export tasks - - Download exported files when ready +#### Join Attrs + +Join Attrs enables relationship traversal in search results. Key points: + +- **Implementation** + - Sequential processing: root -> join targets + - Each join triggers new Elasticsearch query + - Supports OBJECT and ARRAY type references + +- **Critical Considerations** + 1. **Pagination Behavior** + ```python + # Example: Request 100 items + root_results = search(limit=100) # Returns 100 root items + joined_results = join_and_filter() # May return 0-100 items + next_page_starts_at = 101 # Regardless of joined result size + ``` + - Pagination applies to root level only + - Join/filter operations may reduce result size + - Each page may return fewer items than requested + + 2. **Performance Impact** + - N+1 query pattern with multiple joins + - No optimization for deep joins with filters + + 3. **Result Count Accuracy** + - Total count represents root level matches only + - Actual result count may be lower after joins/filters + - Cannot predict exact total after joins without full scan + +#### Search Chain +- Follow relationships between entries +- Search through referenced objects +- Chain multiple searches to traverse complex relationships +- Results include both direct matches and related entries + +#### Export Functionality + +- Export search results to various formats +- Asynchronous processing for large result sets +- Progress tracking for export tasks +- Download exported files when ready ## Access Methods @@ -87,7 +118,6 @@ Access Advanced Search programmatically through REST endpoints: - Leverage search chains for complex relationship queries - Monitor export task progress for large result sets - Consider pagination for large result sets in API usage - ## For Developers ### Architecture Overview @@ -173,3 +203,4 @@ Access Advanced Search programmatically through REST endpoints: - Integration tests for API endpoints - Performance tests for search operations - ACL verification tests + diff --git a/entry/api_v2/views.py b/entry/api_v2/views.py index aea58a288..900f5cb3e 100644 --- a/entry/api_v2/views.py +++ b/entry/api_v2/views.py @@ -240,6 +240,11 @@ class AdvancedSearchAPI(generics.GenericAPIView): """ NOTE for now it's just copied from /api/v1/entry/search, but it should be rewritten with DRF components. + + Join Attrs implementation notes: + - Pagination is applied at root level first, then join & filter operations + - This may result in fewer items than requested limit + - Each join triggers a new ES query (N+1 pattern) """ @extend_schema( @@ -275,8 +280,18 @@ def _get_joined_resp( prev_results: list[AdvancedSearchResultRecord], join_attr: AdvancedSearchJoinAttrInfo ) -> tuple[bool, AdvancedSearchResults]: """ - This is a helper method for join_attrs that will get specified attr values - that prev_result's ones refer to. + Process join operation for a single attribute. + + Flow: + 1. Get related entities from prev_results + 2. Extract referral IDs and names + 3. Execute new ES query for joined entities + 4. Apply filters if specified + + Note: + - Each call triggers new ES query + - Results may be reduced by join filters + - Pagination from root level may lead to incomplete results """ entities = Entity.objects.filter( id__in=[result.entity["id"] for result in prev_results] @@ -364,21 +379,20 @@ def _get_joined_resp( # === End of Function: _get_joined_resp() === - def _get_ref_id_from_es_result(attrinfo): - if attrinfo["type"] == AttrType.OBJECT: - if attrinfo.get("value") is not None: + def _get_ref_id_from_es_result(attrinfo) -> list[int | None]: + match attrinfo["type"]: + case AttrType.OBJECT if attrinfo.get("value") is not None: return [attrinfo["value"].get("id")] - if attrinfo["type"] == AttrType.NAMED_OBJECT: - if attrinfo.get("value") is not None: + case AttrType.NAMED_OBJECT if attrinfo.get("value") is not None: [ref_info] = attrinfo["value"].values() return [ref_info.get("id")] - if attrinfo["type"] == AttrType.ARRAY_OBJECT: - return [x.get("id") for x in attrinfo["value"]] + case AttrType.ARRAY_OBJECT: + return [x.get("id") for x in attrinfo["value"]] - if attrinfo["type"] == AttrType.ARRAY_NAMED_OBJECT: - return sum([[y["id"] for y in x.values()] for x in attrinfo["value"]], []) + case AttrType.ARRAY_NAMED_OBJECT: + return sum([[y["id"] for y in x.values()] for x in attrinfo["value"]], []) return [] @@ -443,6 +457,8 @@ def _get_ref_id_from_es_result(attrinfo): total_count = deepcopy(resp.ret_count) for join_attr in join_attrs: + # Note: Each iteration here represents a potential N+1 query + # The trade-off is between query performance and result accuracy (will_filter_by_joined_attr, joined_resp) = _get_joined_resp(resp.ret_values, join_attr) # This is needed to set result as blank value blank_joining_info = { @@ -465,8 +481,8 @@ def _get_ref_id_from_es_result(attrinfo): } # this inserts result to previous search result - new_ret_values = [] - joined_ret_values = [] + new_ret_values: list[AdvancedSearchResultRecord] = [] + joined_ret_values: list[AdvancedSearchResultRecord] = [] for resp_result in resp.ret_values: # joining search result to original one ref_info = resp_result.attrs.get(join_attr.name)