From bfd548bffa623fe8bb2ca76afdfddf0dd6d1ab04 Mon Sep 17 00:00:00 2001 From: Jillian Vogel Date: Wed, 11 Sep 2024 14:14:37 +0930 Subject: [PATCH 1/4] feat: index collections when created or updated --- openedx/core/djangoapps/content/search/api.py | 16 +++ .../djangoapps/content/search/handlers.py | 26 +++- .../core/djangoapps/content/search/tasks.py | 13 ++ .../content/search/tests/test_api.py | 129 +++++++++++++----- 4 files changed, 147 insertions(+), 37 deletions(-) diff --git a/openedx/core/djangoapps/content/search/api.py b/openedx/core/djangoapps/content/search/api.py index d601abde8e6d..24f6a012478a 100644 --- a/openedx/core/djangoapps/content/search/api.py +++ b/openedx/core/djangoapps/content/search/api.py @@ -553,6 +553,22 @@ def upsert_library_block_index_doc(usage_key: UsageKey) -> None: _update_index_docs(docs) +def upsert_library_collection_index_doc(library_key: LibraryLocatorV2, collection_key: str) -> None: + """ + Creates or updates the document for the given Library Collection in the search index + """ + content_library = lib_api.ContentLibrary.objects.get_by_key(library_key) + collection = authoring_api.get_collection( + learning_package_id=content_library.learning_package_id, + collection_key=collection_key, + ) + docs = [ + searchable_doc_for_collection(collection) + ] + + _update_index_docs(docs) + + def upsert_content_library_index_docs(library_key: LibraryLocatorV2) -> None: """ Creates or updates the documents for the given Content Library in the search index diff --git a/openedx/core/djangoapps/content/search/handlers.py b/openedx/core/djangoapps/content/search/handlers.py index 0c2054ad39a8..47583baad091 100644 --- a/openedx/core/djangoapps/content/search/handlers.py +++ b/openedx/core/djangoapps/content/search/handlers.py @@ -6,20 +6,23 @@ from django.db.models.signals import post_delete from django.dispatch import receiver +from opaque_keys import InvalidKeyError +from opaque_keys.edx.keys import UsageKey from openedx_events.content_authoring.data import ( ContentLibraryData, ContentObjectChangedData, LibraryBlockData, + LibraryCollectionData, XBlockData, ) -from opaque_keys import InvalidKeyError -from opaque_keys.edx.keys import UsageKey from openedx_events.content_authoring.signals import ( CONTENT_LIBRARY_DELETED, CONTENT_LIBRARY_UPDATED, LIBRARY_BLOCK_CREATED, LIBRARY_BLOCK_DELETED, LIBRARY_BLOCK_UPDATED, + LIBRARY_COLLECTION_CREATED, + LIBRARY_COLLECTION_UPDATED, XBLOCK_CREATED, XBLOCK_DELETED, XBLOCK_UPDATED, @@ -34,6 +37,7 @@ delete_library_block_index_doc, delete_xblock_index_doc, update_content_library_index_docs, + update_library_collection_index_doc, upsert_library_block_index_doc, upsert_xblock_index_doc, ) @@ -151,6 +155,24 @@ def content_library_updated_handler(**kwargs) -> None: update_content_library_index_docs.apply(args=[str(content_library_data.library_key)]) +@receiver(LIBRARY_COLLECTION_CREATED) +@receiver(LIBRARY_COLLECTION_UPDATED) +@only_if_meilisearch_enabled +def library_collection_updated_handler(**kwargs) -> None: + """ + Create or update the index for the content library collection + """ + library_collection = kwargs.get("library_collection", None) + if not library_collection or not isinstance(library_collection, LibraryCollectionData): # pragma: no cover + log.error("Received null or incorrect data for event") + return + + update_library_collection_index_doc.delay( + str(library_collection.library_key), + library_collection.collection_key, + ) + + @receiver(CONTENT_OBJECT_ASSOCIATIONS_CHANGED) @only_if_meilisearch_enabled def content_object_associations_changed_handler(**kwargs) -> None: diff --git a/openedx/core/djangoapps/content/search/tasks.py b/openedx/core/djangoapps/content/search/tasks.py index dfd603776981..d9dad834db29 100644 --- a/openedx/core/djangoapps/content/search/tasks.py +++ b/openedx/core/djangoapps/content/search/tasks.py @@ -84,3 +84,16 @@ def update_content_library_index_docs(library_key_str: str) -> None: # Delete all documents in this library that were not published by above function # as this task is also triggered on discard event. api.delete_all_draft_docs_for_library(library_key) + + +@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError)) +@set_code_owner_attribute +def update_library_collection_index_doc(library_key_str: str, collection_key: str) -> None: + """ + Celery task to update the content index documents for a library collection + """ + library_key = LibraryLocatorV2.from_string(library_key_str) + + log.info("Updating content index documents for collection %s in library%s", collection_key, library_key) + + api.upsert_library_collection_index_doc(library_key, collection_key) diff --git a/openedx/core/djangoapps/content/search/tests/test_api.py b/openedx/core/djangoapps/content/search/tests/test_api.py index f8f13d8b9a6f..d075681798bc 100644 --- a/openedx/core/djangoapps/content/search/tests/test_api.py +++ b/openedx/core/djangoapps/content/search/tests/test_api.py @@ -186,16 +186,16 @@ def setUp(self): description="my collection description" ) self.collection_dict = { - 'id': 'MYCOL', - 'type': 'collection', - 'display_name': 'my_collection', - 'description': 'my collection description', - 'context_key': 'lib:org1:lib', - 'org': 'org1', - 'created': created_date.timestamp(), - 'modified': created_date.timestamp(), + "id": "MYCOL", + "type": "collection", + "display_name": "my_collection", + "description": "my collection description", + "context_key": "lib:org1:lib", + "org": "org1", + "created": created_date.timestamp(), + "modified": created_date.timestamp(), "access_id": lib_access.id, - 'breadcrumbs': [{'display_name': 'Library'}] + "breadcrumbs": [{"display_name": "Library"}], } @override_settings(MEILISEARCH_ENABLED=False) @@ -409,38 +409,93 @@ def test_index_library_block_tags(self, mock_meilisearch): ) @override_settings(MEILISEARCH_ENABLED=True) - def test_index_library_block_collections(self, mock_meilisearch): + def test_index_library_block_and_collections(self, mock_meilisearch): """ - Test indexing an Library Block that is in two collections. + Test indexing an Library Block and the Collections it's in. """ - collection1 = authoring_api.create_collection( - learning_package_id=self.library.learning_package.id, - key="COL1", - title="Collection 1", - created_by=None, - description="First Collection", - ) - - collection2 = authoring_api.create_collection( - learning_package_id=self.library.learning_package.id, - key="COL2", - title="Collection 2", - created_by=None, - description="Second Collection", - ) + # Create collections (these internally call `upsert_library_collection_index_doc`) + created_date = datetime(2023, 5, 6, 7, 8, 9, tzinfo=timezone.utc) + with freeze_time(created_date): + collection1 = library_api.create_library_collection( + self.library.key, + collection_key="COL1", + title="Collection 1", + created_by=None, + description="First Collection", + ) - # Add Problem1 to both Collections (these internally call `upsert_block_collections_index_docs`) - # (adding in reverse order to test sorting of collection tag)) - for collection in (collection2, collection1): - library_api.update_library_collection_components( + collection2 = library_api.create_library_collection( self.library.key, - collection_key=collection.key, - usage_keys=[ - self.problem1.usage_key, - ], + collection_key="COL2", + title="Collection 2", + created_by=None, + description="Second Collection", ) - # Build expected docs with collections at each stage + # Add Problem1 to both Collections (these internally call `upsert_block_collections_index_docs` and + # `upsert_library_collection_index_doc`) + # (adding in reverse order to test sorting of collection tag) + updated_date = datetime(2023, 6, 7, 8, 9, 10, tzinfo=timezone.utc) + with freeze_time(updated_date): + for collection in (collection2, collection1): + library_api.update_library_collection_components( + self.library.key, + collection_key=collection.key, + usage_keys=[ + self.problem1.usage_key, + ], + ) + + # Build expected docs at each stage + lib_access, _ = SearchAccess.objects.get_or_create(context_key=self.library.key) + doc_collection1_created = { + "id": "COL1", + "type": "collection", + "display_name": "Collection 1", + "description": "First Collection", + "context_key": "lib:org1:lib", + "org": "org1", + "created": created_date.timestamp(), + "modified": created_date.timestamp(), + "access_id": lib_access.id, + "breadcrumbs": [{"display_name": "Library"}], + } + doc_collection2_created = { + "id": "COL2", + "type": "collection", + "display_name": "Collection 2", + "description": "Second Collection", + "context_key": "lib:org1:lib", + "org": "org1", + "created": created_date.timestamp(), + "modified": created_date.timestamp(), + "access_id": lib_access.id, + "breadcrumbs": [{"display_name": "Library"}], + } + doc_collection2_updated = { + "id": "COL2", + "type": "collection", + "display_name": "Collection 2", + "description": "Second Collection", + "context_key": "lib:org1:lib", + "org": "org1", + "created": created_date.timestamp(), + "modified": updated_date.timestamp(), + "access_id": lib_access.id, + "breadcrumbs": [{"display_name": "Library"}], + } + doc_collection1_updated = { + "id": "COL1", + "type": "collection", + "display_name": "Collection 1", + "description": "First Collection", + "context_key": "lib:org1:lib", + "org": "org1", + "created": created_date.timestamp(), + "modified": updated_date.timestamp(), + "access_id": lib_access.id, + "breadcrumbs": [{"display_name": "Library"}], + } doc_problem_with_collection2 = { "id": self.doc_problem1["id"], "collections": [collection2.key], @@ -452,6 +507,10 @@ def test_index_library_block_collections(self, mock_meilisearch): mock_meilisearch.return_value.index.return_value.update_documents.assert_has_calls( [ + call([doc_collection1_created]), + call([doc_collection2_created]), + call([doc_collection2_updated]), + call([doc_collection1_updated]), call([doc_problem_with_collection2]), call([doc_problem_with_collection1]), ], From 0037530d32408d9a417665285b061fdd83439afc Mon Sep 17 00:00:00 2001 From: Jillian Vogel Date: Wed, 11 Sep 2024 14:15:31 +0930 Subject: [PATCH 2/4] feat: adds num_children to collection index --- openedx/core/djangoapps/content/search/documents.py | 5 +++++ openedx/core/djangoapps/content/search/tests/test_api.py | 5 +++++ .../core/djangoapps/content/search/tests/test_documents.py | 2 ++ 3 files changed, 12 insertions(+) diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py index 51eca8816430..e45c9fb3f634 100644 --- a/openedx/core/djangoapps/content/search/documents.py +++ b/openedx/core/djangoapps/content/search/documents.py @@ -61,6 +61,10 @@ class Fields: # Text (html) blocks have an "html_content" key in here, capa has "capa_content" and "problem_types", and so on. content = "content" + # Collections use this field to communicate how many entities/components they contain. + # Structural XBlocks may use this one day to indicate how many child blocks they ocntain. + num_children = "num_children" + # Note: new fields or values can be added at any time, but if they need to be indexed for filtering or keyword # search, the index configuration will need to be changed, which is only done as part of the 'reindex_studio' # command (changing those settings on an large active index is not recommended). @@ -344,6 +348,7 @@ def searchable_doc_for_collection(collection) -> dict: # If related contentlibrary is found, it will override this value below. # Mostly contentlibrary.library_key == learning_package.key Fields.context_key: collection.learning_package.key, + Fields.num_children: collection.entities.count(), } # Just in case learning_package is not related to a library try: diff --git a/openedx/core/djangoapps/content/search/tests/test_api.py b/openedx/core/djangoapps/content/search/tests/test_api.py index d075681798bc..ec9975724753 100644 --- a/openedx/core/djangoapps/content/search/tests/test_api.py +++ b/openedx/core/djangoapps/content/search/tests/test_api.py @@ -190,6 +190,7 @@ def setUp(self): "type": "collection", "display_name": "my_collection", "description": "my collection description", + "num_children": 0, "context_key": "lib:org1:lib", "org": "org1", "created": created_date.timestamp(), @@ -453,6 +454,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch): "type": "collection", "display_name": "Collection 1", "description": "First Collection", + "num_children": 0, "context_key": "lib:org1:lib", "org": "org1", "created": created_date.timestamp(), @@ -465,6 +467,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch): "type": "collection", "display_name": "Collection 2", "description": "Second Collection", + "num_children": 0, "context_key": "lib:org1:lib", "org": "org1", "created": created_date.timestamp(), @@ -477,6 +480,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch): "type": "collection", "display_name": "Collection 2", "description": "Second Collection", + "num_children": 1, "context_key": "lib:org1:lib", "org": "org1", "created": created_date.timestamp(), @@ -489,6 +493,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch): "type": "collection", "display_name": "Collection 1", "description": "First Collection", + "num_children": 1, "context_key": "lib:org1:lib", "org": "org1", "created": created_date.timestamp(), diff --git a/openedx/core/djangoapps/content/search/tests/test_documents.py b/openedx/core/djangoapps/content/search/tests/test_documents.py index d972c9ac4956..90ac9c1a70bb 100644 --- a/openedx/core/djangoapps/content/search/tests/test_documents.py +++ b/openedx/core/djangoapps/content/search/tests/test_documents.py @@ -299,6 +299,7 @@ def test_collection_with_library(self): "org": "edX", "display_name": "Toy Collection", "description": "my toy collection description", + "num_children": 1, "context_key": "lib:edX:2012_Fall", "access_id": self.library_access_id, "breadcrumbs": [{"display_name": "some content_library"}], @@ -327,6 +328,7 @@ def test_collection_with_no_library(self): "type": "collection", "display_name": "my_collection", "description": "my collection description", + "num_children": 0, "context_key": learning_package.key, "access_id": self.toy_course_access_id, "breadcrumbs": [{"display_name": "some learning_package"}], From 5bdcc9eeabaac38fba151a943a614cc5b01ce104 Mon Sep 17 00:00:00 2001 From: Jillian Vogel Date: Thu, 12 Sep 2024 14:08:53 +0930 Subject: [PATCH 3/4] fix: store collection.key in the document block_id field --- openedx/core/djangoapps/content/search/documents.py | 9 +++++++-- openedx/core/djangoapps/content/search/tests/test_api.py | 7 ++++++- .../djangoapps/content/search/tests/test_documents.py | 2 ++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py index ab73123d7572..6a6c920a71db 100644 --- a/openedx/core/djangoapps/content/search/documents.py +++ b/openedx/core/djangoapps/content/search/documents.py @@ -28,7 +28,11 @@ class Fields: id = "id" usage_key = "usage_key" type = "type" # DocType.course_block or DocType.library_block (see below) - block_id = "block_id" # The block_id part of the usage key. Sometimes human-readable, sometimes a random hex ID + # The block_id part of the usage key for course or library blocks. + # If it's a collection, the collection.key is stored here. + # Sometimes human-readable, sometimes a random hex ID + # Is only unique within the given context_key. + block_id = "block_id" display_name = "display_name" description = "description" modified = "modified" @@ -54,7 +58,7 @@ class Fields: tags_level1 = "level1" tags_level2 = "level2" tags_level3 = "level3" - # List of collection.key strings this object belongs to. + # List of collection.block_id strings this object belongs to. collections = "collections" # The "content" field is a dictionary of arbitrary data, depending on the block_type. # It comes from each XBlock's index_dictionary() method (if present) plus some processing. @@ -339,6 +343,7 @@ def searchable_doc_for_collection(collection) -> dict: """ doc = { Fields.id: collection.id, + Fields.block_id: collection.key, Fields.type: DocType.collection, Fields.display_name: collection.title, Fields.description: collection.description, diff --git a/openedx/core/djangoapps/content/search/tests/test_api.py b/openedx/core/djangoapps/content/search/tests/test_api.py index e4a004635103..3105ad8c007f 100644 --- a/openedx/core/djangoapps/content/search/tests/test_api.py +++ b/openedx/core/djangoapps/content/search/tests/test_api.py @@ -186,7 +186,8 @@ def setUp(self): description="my collection description" ) self.collection_dict = { - 'id': self.collection.id, + "id": self.collection.id, + "block_id": self.collection.key, "type": "collection", "display_name": "my_collection", "description": "my collection description", @@ -451,6 +452,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch): lib_access, _ = SearchAccess.objects.get_or_create(context_key=self.library.key) doc_collection1_created = { "id": collection1.id, + "block_id": collection1.key, "type": "collection", "display_name": "Collection 1", "description": "First Collection", @@ -464,6 +466,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch): } doc_collection2_created = { "id": collection2.id, + "block_id": collection2.key, "type": "collection", "display_name": "Collection 2", "description": "Second Collection", @@ -477,6 +480,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch): } doc_collection2_updated = { "id": collection2.id, + "block_id": collection2.key, "type": "collection", "display_name": "Collection 2", "description": "Second Collection", @@ -490,6 +494,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch): } doc_collection1_updated = { "id": collection1.id, + "block_id": collection1.key, "type": "collection", "display_name": "Collection 1", "description": "First Collection", diff --git a/openedx/core/djangoapps/content/search/tests/test_documents.py b/openedx/core/djangoapps/content/search/tests/test_documents.py index 990ec4db9dcb..47a89b2068a8 100644 --- a/openedx/core/djangoapps/content/search/tests/test_documents.py +++ b/openedx/core/djangoapps/content/search/tests/test_documents.py @@ -295,6 +295,7 @@ def test_collection_with_library(self): doc = searchable_doc_for_collection(self.collection) assert doc == { "id": self.collection.id, + "block_id": self.collection.key, "type": "collection", "org": "edX", "display_name": "Toy Collection", @@ -325,6 +326,7 @@ def test_collection_with_no_library(self): doc = searchable_doc_for_collection(collection) assert doc == { "id": collection.id, + "block_id": collection.key, "type": "collection", "display_name": "my_collection", "description": "my collection description", From 2c6c8cf9d98ff28a60c2f928115544e6906e296b Mon Sep 17 00:00:00 2001 From: Jillian Vogel Date: Thu, 12 Sep 2024 15:39:13 +0930 Subject: [PATCH 4/4] fix: apply library collection changes synchronously to update the index before the frontend re-fetches --- openedx/core/djangoapps/content/search/handlers.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/openedx/core/djangoapps/content/search/handlers.py b/openedx/core/djangoapps/content/search/handlers.py index b41c0e5d7851..6a341c92ed2b 100644 --- a/openedx/core/djangoapps/content/search/handlers.py +++ b/openedx/core/djangoapps/content/search/handlers.py @@ -167,10 +167,13 @@ def library_collection_updated_handler(**kwargs) -> None: log.error("Received null or incorrect data for event") return - update_library_collection_index_doc.delay( + # Update collection index synchronously to make sure that search index is updated before + # the frontend invalidates/refetches index. + # See content_library_updated_handler for more details. + update_library_collection_index_doc.apply(args=[ str(library_collection.library_key), library_collection.collection_key, - ) + ]) @receiver(CONTENT_OBJECT_ASSOCIATIONS_CHANGED)