Skip to content

Commit

Permalink
Merge pull request #7192 from cfpb/ccdb5-api-opensearch
Browse files Browse the repository at this point in the history
django-opensearch-dsl Implementation
  • Loading branch information
Colin-Seifer authored Aug 26, 2022
2 parents 24178dd + 29e66a8 commit e6d7eb2
Show file tree
Hide file tree
Showing 39 changed files with 360 additions and 141 deletions.
9 changes: 5 additions & 4 deletions cfgov/ask_cfpb/documents.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from django_elasticsearch_dsl import Document, fields
from django_elasticsearch_dsl.registries import registry
from django_opensearch_dsl import Document, fields
from django_opensearch_dsl.registries import registry

from ask_cfpb.models.answer_page import AnswerPage
from search.elasticsearch_helpers import (
Expand All @@ -19,8 +19,8 @@ class AnswerPageDocument(Document):
url = fields.TextField()
preview = fields.TextField(attr="answer_content_preview")

def get_queryset(self):
query_set = super().get_queryset()
def get_queryset(self, *args, **kwargs):
query_set = super().get_queryset(*args, **kwargs)
return query_set.filter(live=True, redirect_to_page=None)

def prepare_autocomplete(self, instance):
Expand All @@ -41,6 +41,7 @@ def prepare_url(self, instance):
class Index:
name = environment_specific_index("ask-cfpb")
settings = {"number_of_shards": 1, "number_of_replicas": 0}
auto_refresh = False

class Django:
model = AnswerPage
Expand Down
2 changes: 1 addition & 1 deletion cfgov/ask_cfpb/models/search.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from elasticsearch.exceptions import RequestError
from opensearchpy.exceptions import RequestError

from ask_cfpb.documents import AnswerPageDocument
from search.models import AUTOCOMPLETE_MAX_CHARS
Expand Down
23 changes: 12 additions & 11 deletions cfgov/ask_cfpb/tests/test_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@

from django.apps import apps
from django.db import models
from django.test import TestCase
from django.test import TestCase, override_settings

from wagtail.core.models import Site

from django_elasticsearch_dsl import fields
from django_elasticsearch_dsl.documents import DocType
from django_elasticsearch_dsl.exceptions import ModelFieldNotMappedError
from django_opensearch_dsl import fields
from django_opensearch_dsl.documents import Document
from django_opensearch_dsl.exceptions import ModelFieldNotMappedError
from model_bakery import baker

from ask_cfpb.documents import AnswerPageDocument
Expand Down Expand Up @@ -100,7 +100,7 @@ def test_ignore_signal_default(self):
self.assertFalse(AnswerPageDocument.django.ignore_signals)

def test_auto_refresh_default(self):
self.assertFalse(AnswerPageDocument.django.auto_refresh)
self.assertFalse(AnswerPageDocument.Index.auto_refresh)

def test_fields_populated(self):
mapping = AnswerPageDocument._doc_type.mapping
Expand All @@ -121,7 +121,7 @@ def test_fields_populated(self):
)

def test_to_field(self):
doc = DocType()
doc = Document()
for f in ["question", "statement"]:
nameField = doc.to_field(f, AnswerPage._meta.get_field(f))
self.assertIsInstance(nameField, fields.TextField)
Expand All @@ -142,7 +142,7 @@ def test_to_field(self):
self.assertEqual(intField._path, ["featured_rank"])

def test_to_field_with_unknown_field(self):
doc = DocType()
doc = Document()
with self.assertRaises(ModelFieldNotMappedError):
doc.to_field(
"answer_base", AnswerPage._meta.get_field("answer_base")
Expand Down Expand Up @@ -213,10 +213,11 @@ def test_prepare_es(self):
},
)

@override_settings(OPENSEARCH_DSL_AUTO_REFRESH=True)
def test_model_instance_update_no_refresh(self):
self.es_parent_page.add_child(instance=self.es_page)
self.es_page.save_revision().publish()
self.doc.django.auto_refresh = False
with patch("django_elasticsearch_dsl.documents.bulk") as mock:
self.doc.update(self.es_page)
self.assertNotIn("refresh", mock.call_args_list[0][1])
self.doc.Index.auto_refresh = False
with patch("django_opensearch_dsl.documents.bulk") as mock:
self.doc.update(self.es_page, "update")
self.assertFalse(mock.call_args_list[0][1]["refresh"])
15 changes: 11 additions & 4 deletions cfgov/ask_cfpb/tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

from django.test import TestCase

from elasticsearch.exceptions import RequestError
from opensearchpy.exceptions import RequestError

from ask_cfpb.documents import AnswerPageDocument
from ask_cfpb.forms import AutocompleteForm
from ask_cfpb.models.answer_page import AnswerPage
from ask_cfpb.models.search import (
Expand Down Expand Up @@ -57,7 +58,9 @@ def test_AnswerPageSearch_autocomplete(self):
live=True,
)
self.ROOT_PAGE.add_child(instance=test_answer_page)
self.rebuild_elasticsearch_index("ask_cfpb", stdout=StringIO())
self.rebuild_elasticsearch_index(
AnswerPageDocument.Index.name, stdout=StringIO()
)
search_term = "mone"
test_answer_page_search = AnswerPageSearch(search_term=search_term)
self.assertEqual(
Expand All @@ -82,7 +85,9 @@ def test_AnswerPage_search(self):
live=True,
)
self.ROOT_PAGE.add_child(instance=test_answer_page)
self.rebuild_elasticsearch_index("ask_cfpb", stdout=StringIO())
self.rebuild_elasticsearch_index(
AnswerPageDocument.Index.name, stdout=StringIO()
)
search_term = "What is money?"
test_answer_page_search = AnswerPageSearch(search_term=search_term)
test_answer_page_search_results = test_answer_page_search.search()[
Expand All @@ -99,7 +104,9 @@ def test_AnswerPage_suggest(self):
live=True,
)
self.ROOT_PAGE.add_child(instance=test_answer_page)
self.rebuild_elasticsearch_index("ask_cfpb", stdout=StringIO())
self.rebuild_elasticsearch_index(
AnswerPageDocument.Index.name, stdout=StringIO()
)
search_term = "monye"
test_answer_page_search = AnswerPageSearch(search_term=search_term)
test_answer_page_search_results = test_answer_page_search.suggest()
Expand Down
26 changes: 18 additions & 8 deletions cfgov/cfgov/settings/base.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import json
import os
import secrets
from pathlib import Path

from django.conf import global_settings
from django.core.exceptions import ImproperlyConfigured
from django.utils.translation import gettext_lazy as _

from elasticsearch import RequestsHttpConnection
from opensearchpy import RequestsHttpConnection
from requests_aws4auth import AWS4Auth

from cfgov.util import admin_emails
Expand Down Expand Up @@ -99,7 +98,7 @@
"form_explainer.apps.FormExplainerConfig",
"teachers_digital_platform",
"wagtailmedia",
"django_elasticsearch_dsl",
"django_opensearch_dsl",
"corsheaders",
"login",
"filing_instruction_guide",
Expand Down Expand Up @@ -327,10 +326,12 @@
)

# ElasticSearch 7 Configuration
TESTING = False
ES_SCHEMA = os.getenv("ES_SCHEMA", "http")
ES_HOST = os.getenv("ES_HOST", "localhost")
ES_PORT = os.getenv("ES_PORT", "9200")
ELASTICSEARCH_BIGINT = 50000
ELASTICSEARCH_DEFAULT_ANALYZER = "snowball"
OPENSEARCH_BIGINT = 50000
OPENSEARCH_DEFAULT_ANALYZER = "snowball"

if os.environ.get("USE_AWS_ES", False):
awsauth = AWS4Auth(
Expand All @@ -339,7 +340,7 @@
"us-east-1",
"es",
)
ELASTICSEARCH_DSL = {
OPENSEARCH_DSL = {
"default": {
"hosts": [{"host": ES_HOST, "port": 443}],
"http_auth": awsauth,
Expand All @@ -349,9 +350,18 @@
},
}
else:
ELASTICSEARCH_DSL = {"default": {"hosts": f"http://{ES_HOST}:{ES_PORT}"}}
OPENSEARCH_DSL = {
"default": {
"hosts": f"{ES_SCHEMA}://{ES_HOST}:{ES_PORT}",
"http_auth": (
os.getenv("ES_USER", "admin"),
os.getenv("ES_PASS", "admin"),
),
"verify_certs": False,
}
}

ELASTICSEARCH_DSL_SIGNAL_PROCESSOR = (
OPENSEARCH_DSL_SIGNAL_PROCESSOR = (
"search.elasticsearch_helpers.WagtailSignalProcessor"
)

Expand Down
4 changes: 2 additions & 2 deletions cfgov/cfgov/settings/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@
# test runner cleans up this directory after the tests run.
MEDIA_ROOT = os.path.join(PROJECT_ROOT, "cfgov", "tests", "test-media")

ELASTICSEARCH_DSL_AUTO_REFRESH = False
ELASTICSEARCH_DSL_AUTOSYNC = False
OPENSEARCH_DSL_AUTO_REFRESH = False
OPENSEARCH_DSL_AUTOSYNC = False

if os.getenv("SKIP_DJANGO_MIGRATIONS"):

Expand Down
8 changes: 4 additions & 4 deletions cfgov/paying_for_college/documents.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from django.urls import reverse

from django_elasticsearch_dsl import Document, fields
from django_elasticsearch_dsl.registries import registry
from django_opensearch_dsl import Document, fields
from django_opensearch_dsl.registries import registry

from paying_for_college.models import School
from search.elasticsearch_helpers import (
Expand All @@ -18,9 +18,9 @@ class SchoolDocument(Document):
url = fields.TextField()
nicknames = fields.TextField()

def get_queryset(self):
def get_queryset(self, *args, **kwargs):
"""Prevent schools that have closed from being indexed."""
query_set = super().get_queryset()
query_set = super().get_queryset(*args, **kwargs)
return query_set.filter(operating=True)

def prepare_autocomplete(self, instance):
Expand Down
4 changes: 2 additions & 2 deletions cfgov/regulations3k/documents.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from django_elasticsearch_dsl import Document, fields
from django_elasticsearch_dsl.registries import registry
from django_opensearch_dsl import Document, fields
from django_opensearch_dsl.registries import registry

from regulations3k.models import SectionParagraph
from search.elasticsearch_helpers import environment_specific_index
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from django.core.management import call_command
from django.core.management.base import BaseCommand

from regulations3k.documents import SectionParagraphDocument
from regulations3k.models import Part, Section


Expand All @@ -12,7 +13,19 @@
def _run_elasticsearch_rebuild():
"""Rebuild the Elasticsearch index after prepping section paragraphs."""
call_command(
"search_index", "--rebuild", "-f", "--models", "regulations3k"
"opensearch",
"index",
"--force",
"rebuild",
SectionParagraphDocument.Index.name,
)
call_command(
"opensearch",
"document",
"--force",
"--refresh",
"-i",
SectionParagraphDocument.Index.name,
)


Expand Down
6 changes: 5 additions & 1 deletion cfgov/regulations3k/tests/test_search_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,8 @@ def test_index_management_command(self, mock_elasticsearch):
)
def test_run_elasticsearch_rebuild(self, mock_call):
update_regulation_index._run_elasticsearch_rebuild()
self.assertEqual(mock_call.call_count, 1)
# django-opensearch-dsl splits the command to create indices and index
# documents into 2 separate commands. For this reason, we expect
# call_command to be invoked twice. This was done in a single command
# with django-elasticsearch-dsl.
self.assertEqual(mock_call.call_count, 2)
30 changes: 21 additions & 9 deletions cfgov/search/elasticsearch_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
pre_page_move,
)

from django_elasticsearch_dsl.signals import BaseSignalProcessor
from elasticsearch_dsl import analyzer, token_filter, tokenizer
from django_opensearch_dsl.signals import BaseSignalProcessor
from opensearch_dsl import analyzer, token_filter, tokenizer

from search.models import Synonym

Expand Down Expand Up @@ -130,14 +130,14 @@ def setUpClass(cls):
# See https://github.com/django-es/django-elasticsearch-dsl/pull/323
# for a proposed pull request to django-elasticsearch-dsl to support
# this blocking behavior.
from elasticsearch.helpers import bulk as original_bulk
from opensearchpy.helpers import bulk as original_bulk

def bulk_with_refresh(*args, **kwargs):
kwargs.setdefault("refresh", True)
return original_bulk(*args, **kwargs)

cls.patched_es_bulk = patch(
"django_elasticsearch_dsl.documents.bulk", new=bulk_with_refresh
"django_opensearch_dsl.documents.bulk", new=bulk_with_refresh
)
cls.patched_es_bulk.start()

Expand All @@ -150,18 +150,30 @@ def tearDownClass(cls):
cls.patched_es_bulk.stop()

@staticmethod
def rebuild_elasticsearch_index(*models, stdout=sys.stdout):
def rebuild_elasticsearch_index(*indices, stdout=sys.stdout):
"""Rebuild an Elasticsearch index, waiting for its completion.
This method is an alias for the built-in search_index Django management
command provided by django-elasticsearch-dsl.
command provided by django-opensearch-dsl.
"""
call_command(
"search_index",
action="rebuild",
"opensearch",
"index",
"rebuild",
*indices,
force=True,
stdout=stdout,
)
if indices:
indices = ("-i", *indices)
call_command(
"opensearch",
"document",
"index",
"--refresh",
*indices,
force=True,
models=models,
stdout=stdout,
)

Expand Down
2 changes: 1 addition & 1 deletion cfgov/search/management/commands/es_health.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from django.core.management.base import BaseCommand, CommandError

from elasticsearch_dsl import connections
from opensearch_dsl import connections


class Command(BaseCommand):
Expand Down
2 changes: 1 addition & 1 deletion cfgov/search/tests/management/commands/test_es_health.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def test_bad_connection_name(self):
with self.assertRaises(CommandError):
call_command("es_health", "notarealconnection", stdout=StringIO())

@mock.patch("elasticsearch_dsl.connections.get_connection")
@mock.patch("opensearch_dsl.connections.get_connection")
def test_foo(self, mock_es_get_connection):
mock_elasticsearch = mock.MagicMock()
mock_elasticsearch.cat.health.return_value = "health table"
Expand Down
10 changes: 6 additions & 4 deletions cfgov/teachers_digital_platform/documents.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from django_elasticsearch_dsl import Document, fields
from django_elasticsearch_dsl.registries import registry
from django_opensearch_dsl import Document, fields
from django_opensearch_dsl.registries import registry

from search.elasticsearch_helpers import environment_specific_index, strip_html
from teachers_digital_platform.models.pages import ActivityPage
Expand Down Expand Up @@ -55,10 +55,12 @@ class Django:
model = ActivityPage
fields = ["id"]

def get_queryset(self):
def get_queryset(self, *args, **kwargs):
"""Prevent non-live pages from being indexed."""
return (
super(ActivityPageDocument, self).get_queryset().filter(live=True)
super(ActivityPageDocument, self)
.get_queryset(*args, **kwargs)
.filter(live=True)
)

def prepare_activity_duration(self, instance):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
)
from wagtail.core.fields import StreamField

from elasticsearch_dsl import Q
from opensearch_dsl import Q

from teachers_digital_platform.documents import ActivityPageDocument
from teachers_digital_platform.models.django import (
Expand Down
Loading

0 comments on commit e6d7eb2

Please sign in to comment.