From 36488dad418adcbe29b9a642dfb40297aac3b2a8 Mon Sep 17 00:00:00 2001 From: David Okeke <140066072+david-okeke1337@users.noreply.github.com> Date: Tue, 17 Dec 2024 15:44:35 +0000 Subject: [PATCH] INTR-447 - Export search as CSV (#842) Co-authored-by: Cameron Lamb --- .../management/commands/create_groups.py | 18 +++ src/content/models.py | 4 +- src/core/forms.py | 3 +- .../migrations/0003_alter_setting_options.py | 22 ++++ src/extended_search/models.py | 5 +- .../search/partials/search_results.html | 5 + .../partials/search_results_category.html | 2 +- src/search/urls.py | 3 +- src/search/utils.py | 123 +++++++++++++++++- src/search/views.py | 53 +++++++- 10 files changed, 229 insertions(+), 9 deletions(-) create mode 100644 src/extended_search/migrations/0003_alter_setting_options.py diff --git a/src/content/management/commands/create_groups.py b/src/content/management/commands/create_groups.py index 6636883af..ede66ee96 100644 --- a/src/content/management/commands/create_groups.py +++ b/src/content/management/commands/create_groups.py @@ -156,6 +156,11 @@ "can_change_home_page_content", ] +SEARCH_EXPORTERS_GROUP_NAME = "Search Exporters" +SEARCH_EXPORTERS_PERMISSIONS = [ + "export_search", +] + class Command(BaseCommand): help = "Create page permissions" @@ -245,6 +250,18 @@ def event_permissions(self): EVENT_EDITORS_PAGE_PERMISSIONS, ) + def search_exporters_permissions(self): + search_exporters_group, _ = Group.objects.get_or_create( + name=SEARCH_EXPORTERS_GROUP_NAME + ) + + search_exporters_group.permissions.set( + Permission.objects.filter( + codename__in=SEARCH_EXPORTERS_PERMISSIONS, + content_type__app_label="extended_search", + ) + ) + def handle(self, *args, **options): news_moderators, _ = Group.objects.get_or_create( name="News Moderators", @@ -354,3 +371,4 @@ def handle(self, *args, **options): self.home_page_permissions() self.event_permissions() + self.search_exporters_permissions() diff --git a/src/content/models.py b/src/content/models.py index eeb6ad5fc..6027fb1a3 100644 --- a/src/content/models.py +++ b/src/content/models.py @@ -35,7 +35,6 @@ from extended_search.index import DWIndexedField as IndexedField from extended_search.index import Indexed, RelatedFields from peoplefinder.widgets import PersonChooser -from search.utils import split_query from user.models import User as UserModel @@ -527,8 +526,9 @@ class SearchKeywordOrPhrase(models.Model): class SearchKeywordOrPhraseQuerySet(models.QuerySet): def filter_by_query(self, query): - query_parts = split_query(query) + from search.utils import split_query + query_parts = split_query(query) return self.filter(search_keyword_or_phrase__keyword_or_phrase__in=query_parts) diff --git a/src/core/forms.py b/src/core/forms.py index 19d06f7c9..c35c30a6d 100644 --- a/src/core/forms.py +++ b/src/core/forms.py @@ -30,4 +30,5 @@ class PageProblemFoundForm(forms.Form): class WagtailUserEditForm(UserEditForm): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - del self.fields["is_superuser"] + if "is_superuser" in self.fields: + del self.fields["is_superuser"] diff --git a/src/extended_search/migrations/0003_alter_setting_options.py b/src/extended_search/migrations/0003_alter_setting_options.py new file mode 100644 index 000000000..f7a715e5a --- /dev/null +++ b/src/extended_search/migrations/0003_alter_setting_options.py @@ -0,0 +1,22 @@ +# Generated by Django 4.2.16 on 2024-11-25 15:24 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("extended_search", "0002_alter_setting_options_alter_setting_key"), + ] + + operations = [ + migrations.AlterModelOptions( + name="setting", + options={ + "permissions": ( + ("view_explore", "View the global search explore page"), + ("export_search", "Export the search result as csv"), + ) + }, + ), + ] diff --git a/src/extended_search/models.py b/src/extended_search/models.py index 56cf23f39..38d05d05e 100644 --- a/src/extended_search/models.py +++ b/src/extended_search/models.py @@ -20,7 +20,10 @@ class Setting(models.Model): ) class Meta: - permissions = (("view_explore", "View the global search explore page"),) + permissions = ( + ("view_explore", "View the global search explore page"), + ("export_search", "Export the search result as csv"), + ) def __str__(self): return self.key diff --git a/src/search/templates/search/partials/search_results.html b/src/search/templates/search/partials/search_results.html index 86155a837..6d69a0d06 100644 --- a/src/search/templates/search/partials/search_results.html +++ b/src/search/templates/search/partials/search_results.html @@ -18,6 +18,11 @@

Start typing to search

{% endif %} +{% if perms.extended_search.export_search %} + Download search results +{% endif %} + {% endif %} diff --git a/src/search/urls.py b/src/search/urls.py index 0801547d4..623f80c0f 100644 --- a/src/search/urls.py +++ b/src/search/urls.py @@ -1,7 +1,7 @@ from django.urls import path from django.views.generic import RedirectView -from .views import autocomplete, explore, search +from .views import autocomplete, explore, export_search, search app_name = "search" @@ -22,5 +22,6 @@ path("explore/", explore, name="explore"), path("autocomplete/", autocomplete, name="autocomplete"), path("/", search, name="category"), + path("/export_search/", export_search, name="export_search"), path("", search, name="home"), ] diff --git a/src/search/utils.py b/src/search/utils.py index be60c9f7e..2d53f2347 100644 --- a/src/search/utils.py +++ b/src/search/utils.py @@ -1,14 +1,25 @@ import re import unicodedata -from typing import Optional +from typing import TYPE_CHECKING, Optional from django.conf import settings +from django.db import models +from django.http import HttpRequest +from django.urls import reverse from wagtail.search.query import Fuzzy, Or, Phrase, PlainText +from content.models import BasePage from extended_search import settings as search_settings from extended_search.index import Indexed from extended_search.query import Nested, OnlyFields from extended_search.query_builder import CustomQueryBuilder +from news.models import NewsPage +from peoplefinder.models import Person, Team + + +if TYPE_CHECKING: + from content.models import BasePage + from peoplefinder.models import Person, Team def sanitize_search_query(query: Optional[str] = None) -> str: @@ -236,3 +247,113 @@ def has_only_bad_results(query, category, pinned_results, search_results): bad_score_threshold = get_bad_score_threshold(query, category) highest_score = search_results[0]._score return highest_score <= bad_score_threshold + + +# +# EXPORT UTILS +# + + +def get_content_owner(page) -> dict: + page_content_owner = getattr(page, "content_owner", None) + return { + "name": page_content_owner.full_name if page_content_owner else "", + "email": page_content_owner.email if page_content_owner else "", + } + + +def get_content_author(page) -> dict: + content_author = { + "name": "", + "email": "", + } + perm_sec_as_author = ( + page.perm_sec_as_author if hasattr(page, "perm_sec_as_author") else False + ) + if perm_sec_as_author: + content_author["name"] = settings.PERM_SEC_NAME + return content_author + + if issubclass(page.__class__, NewsPage) and hasattr(page, "get_first_publisher"): + first_publisher = page.get_first_publisher() + content_author["name"] = first_publisher.get_full_name() + content_author["email"] = first_publisher.email + return content_author + + latest_revision_user = page.get_latest_revision().user + if latest_revision_user: + content_author["name"] = latest_revision_user.get_full_name() + content_author["email"] = latest_revision_user.email + return content_author + + +def get_page_export_row(page_result: "BasePage", request: HttpRequest) -> list[str]: + content_owner = get_content_owner(page_result) + content_author = get_content_author(page_result) + return [ + page_result.title, + request.build_absolute_uri(page_result.get_url()), + request.build_absolute_uri( + reverse("wagtailadmin_pages:edit", args=[page_result.id]) + ), + content_owner["name"], + content_owner["email"], + content_author["name"], + content_author["email"], + page_result.first_published_at, + page_result.last_published_at, + type(page_result).__name__, + ] + + +def get_person_export_row(person_result: "Person", request: HttpRequest) -> list[str]: + return [ + person_result.first_name, + person_result.last_name, + person_result.email, + person_result.primary_phone_number, + request.build_absolute_uri(person_result.get_absolute_url()), + {role.job_title: role.team.name for role in person_result.roles.all()}, + ] + + +def get_team_export_row(team_result: "Team", request: HttpRequest) -> list[str]: + return [ + team_result.name, + request.build_absolute_uri(team_result.get_absolute_url()), + request.build_absolute_uri(reverse("team-edit", args=[team_result.slug])), + ] + + +SEARCH_EXPORT_MAPPINGS: dict[models.Model, dict] = { + BasePage: { + "header": [ + "Title", + "URL", + "Edit URL", + "Content Owner Name", + "Content Owner Email", + "Content Author Name", + "Content Author Email", + "First Published", + "Last Updated", + "Page Type", + ], + "item_to_row_function": get_page_export_row, + }, + Person: { + "header": [ + "First Name", + "Last Name", + "Email", + "Phone", + "Profile URL", + "Roles {'Job Title': 'Team Name'}", + ], + "item_to_row_function": get_person_export_row, + }, + Team: { + "header": ["Title", "URL", "Edit URL"], + "item_to_row_function": get_team_export_row, + }, +} diff --git a/src/search/views.py b/src/search/views.py index 1017fe7fd..c073494e8 100644 --- a/src/search/views.py +++ b/src/search/views.py @@ -1,3 +1,4 @@ +import csv import logging import sentry_sdk @@ -14,7 +15,6 @@ from extended_search.settings import settings_singleton from peoplefinder.models import Person, Team from search.templatetags import search as search_template_tag -from search.utils import get_query_info_for_model logger = logging.getLogger(__name__) @@ -24,6 +24,10 @@ def can_view_explore(): return user_passes_test(lambda u: u.has_perm("extended_search.view_explore")) +def can_export_search(): + return user_passes_test(lambda u: u.has_perm("extended_search.export_search")) + + @require_http_methods(["GET"]) def autocomplete(request: HttpRequest) -> HttpResponse: _category = "autocomplete" @@ -58,7 +62,7 @@ def autocomplete(request: HttpRequest) -> HttpResponse: @require_http_methods(["GET"]) -def search(request: HttpRequest, category: str = None) -> HttpResponse: +def search(request: HttpRequest, category: str | None = None) -> HttpResponse: query = request.GET.get("query", "") page = request.GET.get("page", "1") tab_override = request.GET.get("tab_override", False) @@ -97,6 +101,8 @@ def explore(request: HttpRequest) -> HttpResponse: """ Administrative view for exploring search options, boosts, etc """ + from search.utils import get_query_info_for_model + if request.method == "POST": if not request.user.has_perm("extended_search.change_setting"): messages.error(request, "You are not authorised to edit settings") @@ -132,3 +138,46 @@ def explore(request: HttpRequest) -> HttpResponse: } return TemplateResponse(request, "search/explore.html", context=context) + + +@can_export_search() +def export_search(request: HttpRequest, category: str) -> HttpResponse: + """ + Administrative view for exporting search results as csv + """ + from search.utils import SEARCH_EXPORT_MAPPINGS + + query = request.GET.get("query", "") + if category == "all": + search_vector = search_template_tag.SEARCH_VECTORS["all_pages"](request) + else: + search_vector = search_template_tag.SEARCH_VECTORS[category](request) + + search_results = search_vector.search(query) + search_model = search_vector.model + + export_mapping = None + for k, v in SEARCH_EXPORT_MAPPINGS.items(): + if issubclass(search_model, k): + export_mapping = v + break + + if not export_mapping: + raise TypeError( + f"'{search_model}' is not a model that is configured for export" + ) + + filename = f"search_export_{category}.csv" + response = HttpResponse( + content_type="text/csv", + headers={"Content-Disposition": f'attachment; filename="{filename}"'}, + ) + + writer = csv.writer(response) + writer.writerow(export_mapping["header"]) + + for result in search_results: + row = export_mapping["item_to_row_function"](result, request) + writer.writerow(row) + + return response