diff --git a/client/charts/services/ChartConfig.js b/client/charts/services/ChartConfig.js index 83eeb0f02..a47a3c037 100644 --- a/client/charts/services/ChartConfig.js +++ b/client/charts/services/ChartConfig.js @@ -377,7 +377,7 @@ export function ChartConfig( } else { const child = this.getChild(); - chart.legendTitle = this.getSourceName(child.field); + chart.legendTitle = chart.getTranslationTitle(child.field); chart.tooltipHeader = '{series.name}/{point.x}: {point.y}'; chart.colourByPoint = false; diff --git a/client/content_publishing_report/controllers/ContentPublishingReportController.js b/client/content_publishing_report/controllers/ContentPublishingReportController.js index 7a832393c..f921f602b 100644 --- a/client/content_publishing_report/controllers/ContentPublishingReportController.js +++ b/client/content_publishing_report/controllers/ContentPublishingReportController.js @@ -188,8 +188,8 @@ export function ContentPublishingReportController( (item) => !new Set($scope.report_groups.map((item) => item.name)).has(item.name))]; $scope.subgroup_by = _.filter( - $scope.report_groups, - (group) => group.qcode !== $scope.currentParams.params.aggs.group.field + $scope.group_by, + (group) => JSON.stringify(group.qcode) !== $scope.currentParams.params.aggs.group.field ); if (_.get($scope, 'currentParams.params.aggs.subgroup.field.length', 0) < 1) { @@ -361,7 +361,11 @@ export const generateTitle = (chart, params) => { const parentName = chart.getSourceName(parentField); if (_.get(params, 'aggs.subgroup.field.length', 0) > 0) { - const childField = _.get(params, 'aggs.subgroup.field'); + let childField = _.get(params, 'aggs.subgroup.field'); + + if (childField.startsWith('{"scheme')) { + childField = getCustomVocabFieldName(childField); + } const childName = chart.getSourceName(childField); return gettext( diff --git a/server/analytics/content_publishing_report/content_publishing_report.py b/server/analytics/content_publishing_report/content_publishing_report.py index 864f6172d..386a497e9 100644 --- a/server/analytics/content_publishing_report/content_publishing_report.py +++ b/server/analytics/content_publishing_report/content_publishing_report.py @@ -12,6 +12,7 @@ from analytics.chart_config import ChartConfig from analytics.common import MAX_TERMS_SIZE import json +from flask import current_app as app class ContentPublishingReportResource(BaseReportResource): @@ -94,6 +95,14 @@ def generate_report(self, docs, args): for child in ( (parent.get("child") or {}).get("buckets") or (parent.get("child_aggs", {}).get("child", {}).get("buckets")) + or ( + parent.get("child_aggs", {}) + .get("child_qcode_filter", {}) + .get("qcode_filter", {}) + .get("qcode_terms", {}) + .get("buckets") + ) + or (parent.get("child", {}).get("qcode_filter", {}).get("qcode_terms", {}).get("buckets")) or [] ): child_key = child.get("key") @@ -167,26 +176,58 @@ def get_aggregation_buckets(self, docs, aggregation_ids=None): return super().get_aggregation_buckets(docs, aggregation_ids) def get_custom_aggs_query(self, query, aggs): - field = self.parse_field_param(aggs.get("parent", {}).get("terms", {}).get("field")) - if field: - # Construct the nested aggregation query - qcode_terms_agg = {"terms": {"field": "subject.qcode", "size": 1000}} - - # Retrieve child aggregations if any - child = self.get_child_aggs(query) - if child: + terms_aggs_size = app.config.get("TERMS_AGGREGATION_SIZE", 1000) + # reterive parent field + parent_field = self.parse_field_param(aggs.get("parent", {}).get("terms", {}).get("field")) + + # reterive child field + child = self.get_child_aggs(query) + child_field = self.parse_field_param(child.get("child", {}).get("terms", {}).get("field")) if child else None + + # helper function + def construct_nested_query(field): + return { + "nested": {"path": "subject"}, + "aggs": { + "qcode_filter": { + "filter": {"term": {"subject.scheme": field}}, + "aggs": {"qcode_terms": {"terms": {"field": "subject.qcode", "size": terms_aggs_size}}}, + } + }, + } + + # if parent field is a schema field + if parent_field: + # Construct the terms aggregation for qcode + qcode_terms_agg = {"terms": {"field": "subject.qcode", "size": terms_aggs_size}} + + if child_field: + # Create child aggregation structure + qcode_terms_agg["aggs"] = { + "child_aggs": { + "reverse_nested": {}, + "aggs": {"child_qcode_filter": construct_nested_query(child_field)}, + } + } + else: + # If child_field is not a schema field, include the predefined child aggregation qcode_terms_agg["aggs"] = {"child_aggs": {"reverse_nested": {}, "aggs": child}} + # Construct the parent-child combine aggregation query["aggs"]["parent"] = { "nested": {"path": "subject"}, "aggs": { "qcode_filter": { - "filter": {"term": {"subject.scheme": field}}, + "filter": {"term": {"subject.scheme": parent_field}}, "aggs": {"qcode_terms": qcode_terms_agg}, } }, } + # if parent field is not schema field but child field is schema field + if not parent_field and child_field: + query["aggs"]["parent"]["aggs"]["child"] = construct_nested_query(child_field) + def get_child_aggs(self, query): parent_aggs = query.get("aggs", {}).get("parent", {}) if "aggs" in parent_aggs: diff --git a/server/analytics/content_publishing_report/content_publishing_report_test.py b/server/analytics/content_publishing_report/content_publishing_report_test.py new file mode 100644 index 000000000..906754f2f --- /dev/null +++ b/server/analytics/content_publishing_report/content_publishing_report_test.py @@ -0,0 +1,144 @@ +# -*- coding: utf-8; -*- +# +# This file is part of Superdesk. +# +# Copyright 2024 Sourcefabric z.u. and contributors. +# +# For the full copyright and license information, please see the +# AUTHORS and LICENSE files distributed with this source code, or +# at https://www.sourcefabric.org/superdesk/license + +from superdesk import get_resource_service +from superdesk.metadata.item import ITEM_STATE, CONTENT_STATE +from superdesk.tests import TestCase + +from analytics import init_app + + +class ContentPublishingReportTestCase(TestCase): + def setUp(self): + self.maxDiff = None + + with self.app.app_context(): + init_app(self.app) + self.service = get_resource_service("content_publishing_report") + self.app.data.insert( + "published", + [ + { + "_id": "item1", + "task": {"desk": "de1", "stage": "st1"}, + ITEM_STATE: CONTENT_STATE.PUBLISHED, + "anpa_category": [{"qcode": "advisory", "name": "Advisories"}], + "source": "AAP", + }, + { + "_id": "item2", + "task": {"desk": "de1", "stage": "st1"}, + ITEM_STATE: CONTENT_STATE.PUBLISHED, + "anpa_category": [{"qcode": "domestic_sport", "name": "Domestic Sport"}], + "subject": [ + {"name": "BTL/ECO", "qcode": "btl", "scheme": "packages"}, + {"name": "Canadian", "qcode": "candian", "scheme": "keywords"}, + ], + "source": "AAP", + }, + { + "_id": "item3", + "task": {"desk": "de1", "stage": "st1"}, + ITEM_STATE: CONTENT_STATE.PUBLISHED, + "anpa_category": [{"qcode": "finance", "name": "Finance"}], + "subject": [{"name": "Aap", "qcode": "aap", "scheme": "keywords"}], + "source": "AAP", + }, + { + "_id": "item4", + "task": {"desk": "de1", "stage": "st1"}, + ITEM_STATE: CONTENT_STATE.PUBLISHED, + "subject": [ + {"name": "Aap", "qcode": "aap", "scheme": "keywords"}, + {"name": "BIN/ALG", "qcode": "bin", "scheme": "packages"}, + ], + "source": "AAP", + }, + ], + ) + + def test_get_aggregation_buckets(self): + with self.app.app_context(): + params = { + "source": { + "query": { + "filtered": { + "filter": { + "bool": { + "must": [], + "must_not": [], + } + } + } + }, + "size": 0, + "from": 0, + "sort": [{"versioncreated": "desc"}], + }, + "page": 1, + "max_results": 0, + "aggs": { + "group": {"field": "anpa_category.qcode", "size": 0}, + "subgroup": {"field": '{"scheme":"keywords"}', "size": 0}, + }, + } + args = { + "aggs": { + "group": {"field": "anpa_category.qcode", "size": 0}, + "subgroup": {"field": '{"scheme":"keywords"}', "size": 0}, + }, + "params": { + "dates": {}, + "must": {}, + "must_not": {}, + "min": 1, + "chart": {"type": "column", "sort_order": "desc"}, + }, + "return_type": "aggregations", + } + + docs = self.service.run_query(params, args) + report = self.service.generate_report(docs, args) + assert { + "groups": {"advisory": {}, "domestic_sport": {"candian": 1}, "finance": {"aap": 1}}, + "subgroups": {"candian": 1, "aap": 1}, + } == report + + params["aggs"] = { + "group": {"field": '{"scheme":"packages"}', "size": 0}, + "subgroup": {"field": '{"scheme":"keywords"}', "size": 0}, + } + args["aggs"] = { + "group": {"field": '{"scheme":"packages"}', "size": 0}, + "subgroup": {"field": '{"scheme":"keywords"}', "size": 0}, + } + + docs = self.service.run_query(params, args) + report = self.service.generate_report(docs, args) + assert { + "groups": {"bin": {"aap": 1}, "btl": {"candian": 1}}, + "subgroups": {"aap": 1, "candian": 1}, + } == report + + params["aggs"] = { + "group": {"field": '{"scheme":"keywords"}', "size": 0}, + "subgroup": {"field": "anpa_category.qcode", "size": 0}, + } + args["aggs"] = { + "group": {"field": '{"scheme":"packages"}', "size": 0}, + "subgroup": {"field": "anpa_category.qcode", "size": 0}, + } + + docs = self.service.run_query(params, args) + report = self.service.generate_report(docs, args) + assert { + "groups": {"aap": {"finance": 1}, "candian": {"domestic_sport": 1}}, + "subgroups": {"finance": 1, "domestic_sport": 1}, + } == report diff --git a/server/settings.py b/server/settings.py index 578d89a3a..8115c6001 100644 --- a/server/settings.py +++ b/server/settings.py @@ -203,3 +203,5 @@ def env(variable, fallback_value=None): # Archive Statistics STATISTICS_MONGO_DBNAME = "sptests" ANALYTICS_ENABLE_ARCHIVE_STATS = strtobool(env("ANALYTICS_ENABLE_ARCHIVE_STATS", "true")) + +TERMS_AGGREGATION_SIZE = 1000