Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Store Collection metadata + component count in meilisearch #684

Merged
merged 6 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions openedx/core/djangoapps/content/search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,22 @@ def upsert_library_block_index_doc(usage_key: UsageKey) -> None:
_update_index_docs(docs)


def upsert_library_collection_index_doc(library_key: LibraryLocatorV2, collection_key: str) -> None:
"""
Creates or updates the document for the given Library Collection in the search index
"""
content_library = lib_api.ContentLibrary.objects.get_by_key(library_key)
collection = authoring_api.get_collection(
learning_package_id=content_library.learning_package_id,
collection_key=collection_key,
)
docs = [
searchable_doc_for_collection(collection)
]

_update_index_docs(docs)


def upsert_content_library_index_docs(library_key: LibraryLocatorV2) -> None:
"""
Creates or updates the documents for the given Content Library in the search index
Expand Down
12 changes: 11 additions & 1 deletion openedx/core/djangoapps/content/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@ class Fields:
id = "id"
usage_key = "usage_key"
type = "type" # DocType.course_block or DocType.library_block (see below)
block_id = "block_id" # The block_id part of the usage key. Sometimes human-readable, sometimes a random hex ID
# The block_id part of the usage key for course or library blocks.
# If it's a collection, the collection.key is stored here.
# Sometimes human-readable, sometimes a random hex ID
# Is only unique within the given context_key.
block_id = "block_id"
display_name = "display_name"
description = "description"
modified = "modified"
Expand Down Expand Up @@ -65,6 +69,10 @@ class Fields:
# Text (html) blocks have an "html_content" key in here, capa has "capa_content" and "problem_types", and so on.
content = "content"

# Collections use this field to communicate how many entities/components they contain.
# Structural XBlocks may use this one day to indicate how many child blocks they ocntain.
num_children = "num_children"

# Note: new fields or values can be added at any time, but if they need to be indexed for filtering or keyword
# search, the index configuration will need to be changed, which is only done as part of the 'reindex_studio'
# command (changing those settings on an large active index is not recommended).
Expand Down Expand Up @@ -355,6 +363,7 @@ def searchable_doc_for_collection(collection) -> dict:
"""
doc = {
Fields.id: collection.id,
Fields.block_id: collection.key,
Fields.type: DocType.collection,
Fields.display_name: collection.title,
Fields.description: collection.description,
Expand All @@ -364,6 +373,7 @@ def searchable_doc_for_collection(collection) -> dict:
# If related contentlibrary is found, it will override this value below.
# Mostly contentlibrary.library_key == learning_package.key
Fields.context_key: collection.learning_package.key,
Fields.num_children: collection.entities.count(),
}
# Just in case learning_package is not related to a library
try:
Expand Down
29 changes: 27 additions & 2 deletions openedx/core/djangoapps/content/search/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,23 @@

from django.db.models.signals import post_delete
from django.dispatch import receiver
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import UsageKey
from openedx_events.content_authoring.data import (
ContentLibraryData,
ContentObjectChangedData,
LibraryBlockData,
LibraryCollectionData,
XBlockData,
)
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import UsageKey
from openedx_events.content_authoring.signals import (
CONTENT_LIBRARY_DELETED,
CONTENT_LIBRARY_UPDATED,
LIBRARY_BLOCK_CREATED,
LIBRARY_BLOCK_DELETED,
LIBRARY_BLOCK_UPDATED,
LIBRARY_COLLECTION_CREATED,
LIBRARY_COLLECTION_UPDATED,
XBLOCK_CREATED,
XBLOCK_DELETED,
XBLOCK_UPDATED,
Expand All @@ -34,6 +37,7 @@
delete_library_block_index_doc,
delete_xblock_index_doc,
update_content_library_index_docs,
update_library_collection_index_doc,
upsert_library_block_index_doc,
upsert_xblock_index_doc,
)
Expand Down Expand Up @@ -151,6 +155,27 @@ def content_library_updated_handler(**kwargs) -> None:
update_content_library_index_docs.apply(args=[str(content_library_data.library_key)])


@receiver(LIBRARY_COLLECTION_CREATED)
@receiver(LIBRARY_COLLECTION_UPDATED)
@only_if_meilisearch_enabled
def library_collection_updated_handler(**kwargs) -> None:
"""
Create or update the index for the content library collection
"""
library_collection = kwargs.get("library_collection", None)
if not library_collection or not isinstance(library_collection, LibraryCollectionData): # pragma: no cover
log.error("Received null or incorrect data for event")
return

# Update collection index synchronously to make sure that search index is updated before
# the frontend invalidates/refetches index.
# See content_library_updated_handler for more details.
update_library_collection_index_doc.apply(args=[
str(library_collection.library_key),
library_collection.collection_key,
])


@receiver(CONTENT_OBJECT_ASSOCIATIONS_CHANGED)
@only_if_meilisearch_enabled
def content_object_associations_changed_handler(**kwargs) -> None:
Expand Down
13 changes: 13 additions & 0 deletions openedx/core/djangoapps/content/search/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,16 @@ def update_content_library_index_docs(library_key_str: str) -> None:
# Delete all documents in this library that were not published by above function
# as this task is also triggered on discard event.
api.delete_all_draft_docs_for_library(library_key)


@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError))
@set_code_owner_attribute
def update_library_collection_index_doc(library_key_str: str, collection_key: str) -> None:
"""
Celery task to update the content index documents for a library collection
"""
library_key = LibraryLocatorV2.from_string(library_key_str)

log.info("Updating content index documents for collection %s in library%s", collection_key, library_key)

api.upsert_library_collection_index_doc(library_key, collection_key)
rpenido marked this conversation as resolved.
Show resolved Hide resolved
141 changes: 105 additions & 36 deletions openedx/core/djangoapps/content/search/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,16 +186,18 @@ def setUp(self):
description="my collection description"
)
self.collection_dict = {
'id': self.collection.id,
'type': 'collection',
'display_name': 'my_collection',
'description': 'my collection description',
'context_key': 'lib:org1:lib',
'org': 'org1',
'created': created_date.timestamp(),
'modified': created_date.timestamp(),
"id": self.collection.id,
"block_id": self.collection.key,
"type": "collection",
"display_name": "my_collection",
"description": "my collection description",
"num_children": 0,
"context_key": "lib:org1:lib",
"org": "org1",
"created": created_date.timestamp(),
"modified": created_date.timestamp(),
"access_id": lib_access.id,
'breadcrumbs': [{'display_name': 'Library'}]
"breadcrumbs": [{"display_name": "Library"}],
}

@override_settings(MEILISEARCH_ENABLED=False)
Expand Down Expand Up @@ -415,38 +417,101 @@ def test_index_library_block_tags(self, mock_meilisearch):
)

@override_settings(MEILISEARCH_ENABLED=True)
def test_index_library_block_collections(self, mock_meilisearch):
def test_index_library_block_and_collections(self, mock_meilisearch):
"""
Test indexing an Library Block that is in two collections.
Test indexing an Library Block and the Collections it's in.
"""
collection1 = authoring_api.create_collection(
learning_package_id=self.library.learning_package.id,
key="COL1",
title="Collection 1",
created_by=None,
description="First Collection",
)

collection2 = authoring_api.create_collection(
learning_package_id=self.library.learning_package.id,
key="COL2",
title="Collection 2",
created_by=None,
description="Second Collection",
)
# Create collections (these internally call `upsert_library_collection_index_doc`)
created_date = datetime(2023, 5, 6, 7, 8, 9, tzinfo=timezone.utc)
with freeze_time(created_date):
collection1 = library_api.create_library_collection(
self.library.key,
collection_key="COL1",
title="Collection 1",
created_by=None,
description="First Collection",
)

# Add Problem1 to both Collections (these internally call `upsert_block_collections_index_docs`)
# (adding in reverse order to test sorting of collection tag))
for collection in (collection2, collection1):
library_api.update_library_collection_components(
collection2 = library_api.create_library_collection(
self.library.key,
collection_key=collection.key,
usage_keys=[
self.problem1.usage_key,
],
collection_key="COL2",
title="Collection 2",
created_by=None,
description="Second Collection",
)

# Build expected docs with collections at each stage
# Add Problem1 to both Collections (these internally call `upsert_block_collections_index_docs` and
# `upsert_library_collection_index_doc`)
# (adding in reverse order to test sorting of collection tag)
updated_date = datetime(2023, 6, 7, 8, 9, 10, tzinfo=timezone.utc)
with freeze_time(updated_date):
for collection in (collection2, collection1):
library_api.update_library_collection_components(
self.library.key,
collection_key=collection.key,
usage_keys=[
self.problem1.usage_key,
],
)

# Build expected docs at each stage
lib_access, _ = SearchAccess.objects.get_or_create(context_key=self.library.key)
doc_collection1_created = {
"id": collection1.id,
"block_id": collection1.key,
"type": "collection",
"display_name": "Collection 1",
"description": "First Collection",
"num_children": 0,
"context_key": "lib:org1:lib",
"org": "org1",
"created": created_date.timestamp(),
"modified": created_date.timestamp(),
"access_id": lib_access.id,
"breadcrumbs": [{"display_name": "Library"}],
}
doc_collection2_created = {
"id": collection2.id,
"block_id": collection2.key,
"type": "collection",
"display_name": "Collection 2",
"description": "Second Collection",
"num_children": 0,
"context_key": "lib:org1:lib",
"org": "org1",
"created": created_date.timestamp(),
"modified": created_date.timestamp(),
"access_id": lib_access.id,
"breadcrumbs": [{"display_name": "Library"}],
}
doc_collection2_updated = {
"id": collection2.id,
"block_id": collection2.key,
"type": "collection",
"display_name": "Collection 2",
"description": "Second Collection",
"num_children": 1,
"context_key": "lib:org1:lib",
"org": "org1",
"created": created_date.timestamp(),
"modified": updated_date.timestamp(),
"access_id": lib_access.id,
"breadcrumbs": [{"display_name": "Library"}],
}
doc_collection1_updated = {
"id": collection1.id,
"block_id": collection1.key,
"type": "collection",
"display_name": "Collection 1",
"description": "First Collection",
"num_children": 1,
"context_key": "lib:org1:lib",
"org": "org1",
"created": created_date.timestamp(),
"modified": updated_date.timestamp(),
"access_id": lib_access.id,
"breadcrumbs": [{"display_name": "Library"}],
}
doc_problem_with_collection1 = {
"id": self.doc_problem1["id"],
"collections": {
Expand All @@ -462,9 +527,13 @@ def test_index_library_block_collections(self, mock_meilisearch):
},
}

assert mock_meilisearch.return_value.index.return_value.update_documents.call_count == 2
assert mock_meilisearch.return_value.index.return_value.update_documents.call_count == 6
mock_meilisearch.return_value.index.return_value.update_documents.assert_has_calls(
[
call([doc_collection1_created]),
call([doc_collection2_created]),
call([doc_collection2_updated]),
call([doc_collection1_updated]),
call([doc_problem_with_collection1]),
call([doc_problem_with_collection2]),
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -298,10 +298,12 @@ def test_collection_with_library(self):
doc = searchable_doc_for_collection(self.collection)
assert doc == {
"id": self.collection.id,
"block_id": self.collection.key,
"type": "collection",
"org": "edX",
"display_name": "Toy Collection",
"description": "my toy collection description",
"num_children": 1,
"context_key": "lib:edX:2012_Fall",
"access_id": self.library_access_id,
"breadcrumbs": [{"display_name": "some content_library"}],
Expand All @@ -327,9 +329,11 @@ def test_collection_with_no_library(self):
doc = searchable_doc_for_collection(collection)
assert doc == {
"id": collection.id,
"block_id": collection.key,
"type": "collection",
"display_name": "my_collection",
"description": "my collection description",
"num_children": 0,
"context_key": learning_package.key,
"access_id": self.toy_course_access_id,
"breadcrumbs": [{"display_name": "some learning_package"}],
Expand Down
Loading