From 225524ab7690ba8934584b79252330583605a520 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Mon, 15 Oct 2018 15:11:38 +0200 Subject: [PATCH] Fix handling of empty keyword in terms aggregation Empty values on keyword fields are filtered by the `map` execution mode of the `terms` aggregation. This commit restores them as valid buckets. Closes #34434 --- .../bucket/terms/StringTermsAggregator.java | 2 +- .../bucket/terms/TermsAggregatorTests.java | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregator.java index 5bd8a8cd1d09d..6458b0066dabe 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregator.java @@ -93,7 +93,7 @@ public void collect(int doc, long bucket) throws IOException { if (includeExclude != null && !includeExclude.accept(bytes)) { continue; } - if (previous.get().equals(bytes)) { + if (i > 0 && previous.get().equals(bytes)) { continue; } long bucketOrdinal = bucketOrds.add(bytes); diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java index 819d39cb62bdf..1619989f38bca 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java @@ -158,6 +158,7 @@ public void testSimple() throws Exception { document.add(new SortedSetDocValuesField("string", new BytesRef("b"))); indexWriter.addDocument(document); document = new Document(); + document.add(new SortedSetDocValuesField("string", new BytesRef(""))); document.add(new SortedSetDocValuesField("string", new BytesRef("c"))); document.add(new SortedSetDocValuesField("string", new BytesRef("a"))); indexWriter.addDocument(document); @@ -165,6 +166,9 @@ public void testSimple() throws Exception { document.add(new SortedSetDocValuesField("string", new BytesRef("b"))); document.add(new SortedSetDocValuesField("string", new BytesRef("d"))); indexWriter.addDocument(document); + document = new Document(); + document.add(new SortedSetDocValuesField("string", new BytesRef(""))); + indexWriter.addDocument(document); try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) { IndexSearcher indexSearcher = newIndexSearcher(indexReader); for (TermsAggregatorFactory.ExecutionMode executionMode : TermsAggregatorFactory.ExecutionMode.values()) { @@ -181,15 +185,17 @@ public void testSimple() throws Exception { indexSearcher.search(new MatchAllDocsQuery(), aggregator); aggregator.postCollection(); Terms result = (Terms) aggregator.buildAggregation(0L); - assertEquals(4, result.getBuckets().size()); - assertEquals("a", result.getBuckets().get(0).getKeyAsString()); + assertEquals(5, result.getBuckets().size()); + assertEquals("", result.getBuckets().get(0).getKeyAsString()); assertEquals(2L, result.getBuckets().get(0).getDocCount()); - assertEquals("b", result.getBuckets().get(1).getKeyAsString()); + assertEquals("a", result.getBuckets().get(1).getKeyAsString()); assertEquals(2L, result.getBuckets().get(1).getDocCount()); - assertEquals("c", result.getBuckets().get(2).getKeyAsString()); - assertEquals(1L, result.getBuckets().get(2).getDocCount()); - assertEquals("d", result.getBuckets().get(3).getKeyAsString()); + assertEquals("b", result.getBuckets().get(2).getKeyAsString()); + assertEquals(2L, result.getBuckets().get(2).getDocCount()); + assertEquals("c", result.getBuckets().get(3).getKeyAsString()); assertEquals(1L, result.getBuckets().get(3).getDocCount()); + assertEquals("d", result.getBuckets().get(4).getKeyAsString()); + assertEquals(1L, result.getBuckets().get(4).getDocCount()); } } }