dmm-com · userlocalhost · Feb 3, 2025 · Feb 1, 2025
diff --git a/docs/content/advanced/advanced_search.md b/docs/content/advanced/advanced_search.md
@@ -31,17 +31,48 @@ Advanced Search is a powerful feature that allows you to search across multiple
 
 ### Advanced Features
 
-- **Search Chain**
-  - Follow relationships between entries
-  - Search through referenced objects
-  - Chain multiple searches to traverse complex relationships
-  - Results include both direct matches and related entries
-
-- **Export Functionality**
-  - Export search results to various formats
-  - Asynchronous processing for large result sets
-  - Progress tracking for export tasks
-  - Download exported files when ready
+#### Join Attrs
+
+Join Attrs enables relationship traversal in search results. Key points:
+
+- **Implementation**
+  - Sequential processing: root -> join targets
+  - Each join triggers new Elasticsearch query
+  - Supports OBJECT and ARRAY type references
+
+- **Critical Considerations**
+  1. **Pagination Behavior**
+     ```python
+     # Example: Request 100 items
+     root_results = search(limit=100)      # Returns 100 root items
+     joined_results = join_and_filter()    # May return 0-100 items
+     next_page_starts_at = 101            # Regardless of joined result size
+     ```
+     - Pagination applies to root level only
+     - Join/filter operations may reduce result size
+     - Each page may return fewer items than requested
+
+  2. **Performance Impact**
+     - N+1 query pattern with multiple joins
+     - No optimization for deep joins with filters
+
+  3. **Result Count Accuracy**
+     - Total count represents root level matches only
+     - Actual result count may be lower after joins/filters
+     - Cannot predict exact total after joins without full scan
+
+#### Search Chain
+- Follow relationships between entries
+- Search through referenced objects
+- Chain multiple searches to traverse complex relationships
+- Results include both direct matches and related entries
+
+#### Export Functionality
+
+- Export search results to various formats
+- Asynchronous processing for large result sets
+- Progress tracking for export tasks
+- Download exported files when ready
 
 ## Access Methods
 
@@ -87,7 +118,6 @@ Access Advanced Search programmatically through REST endpoints:
 - Leverage search chains for complex relationship queries
 - Monitor export task progress for large result sets
 - Consider pagination for large result sets in API usage
-
 ## For Developers
 
 ### Architecture Overview
@@ -173,3 +203,4 @@ Access Advanced Search programmatically through REST endpoints:
 - Integration tests for API endpoints
 - Performance tests for search operations
 - ACL verification tests
+
diff --git a/entry/api_v2/views.py b/entry/api_v2/views.py
@@ -240,6 +240,11 @@ class AdvancedSearchAPI(generics.GenericAPIView):
     """
     NOTE for now it's just copied from /api/v1/entry/search, but it should be
     rewritten with DRF components.
+
+    Join Attrs implementation notes:
+    - Pagination is applied at root level first, then join & filter operations
+    - This may result in fewer items than requested limit
+    - Each join triggers a new ES query (N+1 pattern)
     """
 
     @extend_schema(
@@ -275,8 +280,18 @@ def _get_joined_resp(
             prev_results: list[AdvancedSearchResultRecord], join_attr: AdvancedSearchJoinAttrInfo
         ) -> tuple[bool, AdvancedSearchResults]:
             """
-            This is a helper method for join_attrs that will get specified attr values
-            that prev_result's ones refer to.
+            Process join operation for a single attribute.
+
+            Flow:
+            1. Get related entities from prev_results
+            2. Extract referral IDs and names
+            3. Execute new ES query for joined entities
+            4. Apply filters if specified
+
+            Note:
+            - Each call triggers new ES query
+            - Results may be reduced by join filters
+            - Pagination from root level may lead to incomplete results
             """
             entities = Entity.objects.filter(
                 id__in=[result.entity["id"] for result in prev_results]
@@ -364,21 +379,20 @@ def _get_joined_resp(
 
         # === End of Function: _get_joined_resp() ===
 
-        def _get_ref_id_from_es_result(attrinfo):
-            if attrinfo["type"] == AttrType.OBJECT:
-                if attrinfo.get("value") is not None:
+        def _get_ref_id_from_es_result(attrinfo) -> list[int | None]:
+            match attrinfo["type"]:
+                case AttrType.OBJECT if attrinfo.get("value") is not None:
                     return [attrinfo["value"].get("id")]
 
-            if attrinfo["type"] == AttrType.NAMED_OBJECT:
-                if attrinfo.get("value") is not None:
+                case AttrType.NAMED_OBJECT if attrinfo.get("value") is not None:
                     [ref_info] = attrinfo["value"].values()
                     return [ref_info.get("id")]
 
-            if attrinfo["type"] == AttrType.ARRAY_OBJECT:
-                return [x.get("id") for x in attrinfo["value"]]
+                case AttrType.ARRAY_OBJECT:
+                    return [x.get("id") for x in attrinfo["value"]]
 
-            if attrinfo["type"] == AttrType.ARRAY_NAMED_OBJECT:
-                return sum([[y["id"] for y in x.values()] for x in attrinfo["value"]], [])
+                case AttrType.ARRAY_NAMED_OBJECT:
+                    return sum([[y["id"] for y in x.values()] for x in attrinfo["value"]], [])
 
             return []
 
@@ -443,6 +457,8 @@ def _get_ref_id_from_es_result(attrinfo):
         total_count = deepcopy(resp.ret_count)
 
         for join_attr in join_attrs:
+            # Note: Each iteration here represents a potential N+1 query
+            # The trade-off is between query performance and result accuracy
             (will_filter_by_joined_attr, joined_resp) = _get_joined_resp(resp.ret_values, join_attr)
             # This is needed to set result as blank value
             blank_joining_info = {
@@ -465,8 +481,8 @@ def _get_ref_id_from_es_result(attrinfo):
             }
 
             # this inserts result to previous search result
-            new_ret_values = []
-            joined_ret_values = []
+            new_ret_values: list[AdvancedSearchResultRecord] = []
+            joined_ret_values: list[AdvancedSearchResultRecord] = []
             for resp_result in resp.ret_values:
                 # joining search result to original one
                 ref_info = resp_result.attrs.get(join_attr.name)