From 79a89790e3e88d9e1f7f6b6287412c71ca2412db Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 17 Aug 2022 10:18:36 -0400 Subject: [PATCH] Synthetic source: load text from stored fields (#87480) Adds support for loading `text` and `keyword` fields that have `store: true`. We could likely load *any* stored fields, but I wanted to blaze the trail using something fairly useful. --- .../mapper/extras/ScaledFloatFieldMapper.java | 3 +- .../test/60_synthetic_source.yml | 2 +- .../test/get/100_synthetic_source.yml | 76 ++++++ .../test/mget/90_synthetic_source.yml | 79 +++++- .../test/search/400_synthetic_source.yml | 117 +++++++++ .../test/update/100_synthetic_source.yml | 60 +++++ .../index/get/ShardGetService.java | 30 ++- .../index/mapper/BooleanFieldMapper.java | 2 +- .../index/mapper/DateFieldMapper.java | 2 +- .../index/mapper/GeoPointFieldMapper.java | 2 +- .../index/mapper/IpFieldMapper.java | 2 +- .../index/mapper/KeywordFieldMapper.java | 193 +------------- .../index/mapper/NumberFieldMapper.java | 148 +---------- .../index/mapper/ObjectMapper.java | 100 +++++--- ...dNumericDocValuesSyntheticFieldLoader.java | 202 +++++++++++++++ ...ortedSetDocValuesSyntheticFieldLoader.java | 237 ++++++++++++++++++ .../index/mapper/SourceLoader.java | 117 +++++++-- .../mapper/StringStoredFieldFieldLoader.java | 64 +++++ .../index/mapper/TextFieldMapper.java | 13 +- .../search/fetch/FetchPhase.java | 17 +- .../index/mapper/KeywordFieldMapperTests.java | 25 +- .../index/mapper/SourceLoaderTests.java | 4 +- .../index/mapper/TextFieldMapperTests.java | 20 +- .../index/mapper/MapperServiceTestCase.java | 31 ++- .../index/mapper/MapperTestCase.java | 12 +- .../AggregateDoubleMetricFieldMapper.java | 68 +++-- .../mapper/ConstantKeywordFieldMapper.java | 22 +- 27 files changed, 1162 insertions(+), 486 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/index/mapper/SortedNumericDocValuesSyntheticFieldLoader.java create mode 100644 server/src/main/java/org/elasticsearch/index/mapper/SortedSetDocValuesSyntheticFieldLoader.java create mode 100644 server/src/main/java/org/elasticsearch/index/mapper/StringStoredFieldFieldLoader.java diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java index 84c51fe0ab6c1..06bac2ec8b1e1 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java @@ -34,6 +34,7 @@ import org.elasticsearch.index.mapper.MapperBuilderContext; import org.elasticsearch.index.mapper.NumberFieldMapper; import org.elasticsearch.index.mapper.SimpleMappedFieldType; +import org.elasticsearch.index.mapper.SortedNumericDocValuesSyntheticFieldLoader; import org.elasticsearch.index.mapper.SourceLoader; import org.elasticsearch.index.mapper.SourceValueFetcher; import org.elasticsearch.index.mapper.TextSearchInfo; @@ -705,7 +706,7 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to" ); } - return new NumberFieldMapper.NumericSyntheticFieldLoader(name(), simpleName()) { + return new SortedNumericDocValuesSyntheticFieldLoader(name(), simpleName()) { @Override protected void writeValue(XContentBuilder b, long value) throws IOException { b.value(decodeForSyntheticSource(value, scalingFactor)); diff --git a/modules/parent-join/src/yamlRestTest/resources/rest-api-spec/test/60_synthetic_source.yml b/modules/parent-join/src/yamlRestTest/resources/rest-api-spec/test/60_synthetic_source.yml index 5a86fa2074675..55d39940081bc 100644 --- a/modules/parent-join/src/yamlRestTest/resources/rest-api-spec/test/60_synthetic_source.yml +++ b/modules/parent-join/src/yamlRestTest/resources/rest-api-spec/test/60_synthetic_source.yml @@ -10,7 +10,7 @@ unsupported: body: mappings: _source: - synthetic: true + mode: synthetic properties: join_field: type: join diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml index e87a727de94bd..5bcde58ea36d3 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml @@ -167,3 +167,79 @@ force_synthetic_source_bad_mapping: index: test id: 1 force_synthetic_source: true + +--- +stored text: + - skip: + version: " - 8.4.99" + reason: introduced in 8.5.0 + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + text: + type: text + store: true + + - do: + index: + index: test + id: 1 + refresh: true + body: + text: the quick brown fox + + - do: + get: + index: test + id: 1 + - match: {_index: "test"} + - match: {_id: "1"} + - match: {_version: 1} + - match: {found: true} + - match: + _source: + text: the quick brown fox + +--- +stored keyword: + - skip: + version: " - 8.4.99" + reason: introduced in 8.5.0 + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + kwd: + type: keyword + store: true + + - do: + index: + index: test + id: 1 + refresh: true + body: + kwd: the quick brown fox + + - do: + get: + index: test + id: 1 + - match: {_index: "test"} + - match: {_id: "1"} + - match: {_version: 1} + - match: {found: true} + - match: + _source: + kwd: the quick brown fox diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml index e7cde7fa1a7cf..327aa2d0fa4d2 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml @@ -46,6 +46,55 @@ keyword: docs.1._source: kwd: bar +--- +stored text: + - skip: + version: " - 8.4.99" + reason: introduced in 8.5.0 + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + text: + type: text + store: true + + - do: + index: + index: test + id: 1 + body: + text: the quick brown fox + + - do: + index: + index: test + id: 2 + body: + text: jumped over the lazy dog + + - do: + mget: + index: test + body: + ids: [1, 2] + - match: {docs.0._index: "test"} + - match: {docs.0._id: "1"} + - match: + docs.0._source: + text: the quick brown fox + + - match: {docs.1._index: "test"} + - match: {docs.1._id: "2"} + - match: + docs.1._source: + text: jumped over the lazy dog + --- force_synthetic_source_ok: - skip: @@ -60,22 +109,25 @@ force_synthetic_source_ok: _source: mode: stored properties: - kwd: - type: keyword + obj: + properties: + kwd: + type: keyword - do: index: index: test id: 1 body: - kwd: foo + obj.kwd: foo - do: index: index: test id: 2 body: - kwd: bar + obj: + kwd: bar # When _source is used in the fetch the original _source is perfect - do: @@ -85,10 +137,11 @@ force_synthetic_source_ok: ids: [1, 2] - match: docs.0._source: - kwd: foo + obj.kwd: foo - match: docs.1._source: - kwd: bar + obj: + kwd: bar # When we force synthetic source dots in field names get turned into objects - do: @@ -99,16 +152,18 @@ force_synthetic_source_ok: ids: [ 1, 2 ] - match: docs.0._source: - kwd: foo + obj: + kwd: foo - match: docs.1._source: - kwd: bar + obj: + kwd: bar --- force_synthetic_source_bad_mapping: - skip: - version: " - 8.3.99" - reason: introduced in 8.4.0 + version: " - 8.4.99" + reason: message changed in 8.5 - do: indices.create: @@ -157,5 +212,5 @@ force_synthetic_source_bad_mapping: force_synthetic_source: true body: ids: [ 1, 2 ] - - match: {docs.0.error.reason: "field [text] of type [text] doesn't support synthetic source unless it has a sub-field of type [keyword] with doc values enabled and without ignore_above or a normalizer"} - - match: {docs.1.error.reason: "field [text] of type [text] doesn't support synthetic source unless it has a sub-field of type [keyword] with doc values enabled and without ignore_above or a normalizer"} + - match: {docs.0.error.reason: "field [text] of type [text] doesn't support synthetic source unless it is stored or has a sub-field of type [keyword] with doc values or stored and without ignore_above or a normalizer"} + - match: {docs.1.error.reason: "field [text] of type [text] doesn't support synthetic source unless it is stored or has a sub-field of type [keyword] with doc values or stored and without ignore_above or a normalizer"} diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/400_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/400_synthetic_source.yml index b95fc62d24ffd..55351969dbdcd 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/400_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/400_synthetic_source.yml @@ -33,6 +33,123 @@ keyword: hits.hits.0._source: kwd: foo +--- +stored text: + - skip: + version: " - 8.4.99" + reason: introduced in 8.5.0 + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + text: + type: text + store: true + + - do: + index: + index: test + id: 1 + refresh: true + body: + text: the quick brown fox + + - do: + search: + index: test + body: + query: + ids: + values: [1] + - match: + hits.hits.0._source: + text: the quick brown fox + +--- +stored keyword: + - skip: + version: " - 8.4.99" + reason: introduced in 8.5.0 + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + kwd: + type: keyword + store: true + + - do: + index: + index: test + id: 1 + refresh: true + body: + kwd: the quick brown fox + + - do: + search: + index: test + body: + query: + ids: + values: [1] + - match: + hits.hits.0._source: + kwd: the quick brown fox + +--- +stored keyword without sibling fields: + - skip: + version: " - 8.4.99" + reason: introduced in 8.5.0 + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + kwd: + type: keyword + store: true + + - do: + bulk: + refresh: true + index: test + body: + - '{"index": {}}' + - '{"kwd": "the quick brown fox", "s": 1, "n": 1}' + - '{"index": {}}' + - '{"kwd": "jumped over the lazy dog", "s": 2}' + + - do: + search: + index: test + body: + sort: s + - match: + hits.hits.0._source: + kwd: the quick brown fox + s: 1 + n: 1 + - match: + hits.hits.1._source: + kwd: jumped over the lazy dog + s: 2 + --- force_synthetic_source_ok: - skip: diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/update/100_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/update/100_synthetic_source.yml index 6c8e32374884d..2ad71d3b6ed55 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/update/100_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/update/100_synthetic_source.yml @@ -54,3 +54,63 @@ keyword: run_expensive_tasks: true - is_false: test.fields._source - is_true: test.fields._recovery_source + +--- +stored text: + - skip: + version: " - 8.4.99" + reason: introduced in 8.5.0 + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + text: + type: text + store: true + text2: + type: text + store: true + + - do: + index: + index: test + id: 1 + refresh: true + body: + text: the quick brown fox + + - do: + update: + index: test + id: 1 + body: + doc_as_upsert: true + doc: + text2: jumped over the lazy dog + - match: {result: updated} + + - do: + get: + index: test + id: 1 + - match: {_index: "test"} + - match: {_id: "1"} + - match: {_version: 2} + - match: {found: true} + - match: + _source: + text: the quick brown fox + text2: jumped over the lazy dog + + # Make sure there isn't any _source stored field + - do: + indices.disk_usage: + index: test + run_expensive_tasks: true + - is_false: test.fields._source + - is_true: test.fields._recovery_source diff --git a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java index df7d454ace53a..c1d3bd06c08ed 100644 --- a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java +++ b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java @@ -15,7 +15,6 @@ import org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.DocIdAndVersion; import org.elasticsearch.common.metrics.CounterMetric; import org.elasticsearch.common.metrics.MeanMetric; -import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.common.xcontent.XContentFieldFilter; import org.elasticsearch.core.Nullable; import org.elasticsearch.index.IndexSettings; @@ -34,9 +33,12 @@ import org.elasticsearch.search.fetch.subphase.FetchSourceContext; import java.io.IOException; +import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.TimeUnit; import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_PRIMARY_TERM; @@ -247,17 +249,16 @@ private GetResult innerGetFetch( Map metadataFields = null; BytesReference source = null; DocIdAndVersion docIdAndVersion = get.docIdAndVersion(); - FieldsVisitor fieldVisitor = buildFieldsVisitors(storedFields, fetchSourceContext); + SourceLoader loader = forceSyntheticSource + ? new SourceLoader.Synthetic(mappingLookup.getMapping()) + : mappingLookup.newSourceLoader(); + FieldsVisitor fieldVisitor = buildFieldsVisitors(storedFields, fetchSourceContext, loader); if (fieldVisitor != null) { try { docIdAndVersion.reader.document(docIdAndVersion.docId, fieldVisitor); } catch (IOException e) { throw new ElasticsearchException("Failed to get id [" + id + "]", e); } - SourceLoader loader = forceSyntheticSource - ? new SourceLoader.Synthetic(mappingLookup.getMapping()) - : mappingLookup.newSourceLoader(); - source = loader.leaf(docIdAndVersion.reader, new int[] { docIdAndVersion.docId }).source(fieldVisitor, docIdAndVersion.docId); // put stored fields into result objects if (fieldVisitor.fields().isEmpty() == false) { @@ -272,6 +273,7 @@ private GetResult innerGetFetch( } } } + source = loader.leaf(docIdAndVersion.reader, new int[] { docIdAndVersion.docId }).source(fieldVisitor, docIdAndVersion.docId); } if (source != null) { @@ -301,11 +303,19 @@ private GetResult innerGetFetch( ); } - private static FieldsVisitor buildFieldsVisitors(String[] fields, FetchSourceContext fetchSourceContext) { - if (fields == null || fields.length == 0) { + private static FieldsVisitor buildFieldsVisitors(String[] fields, FetchSourceContext fetchSourceContext, SourceLoader loader) { + if (fields != null && fields.length > 0) { + Set fieldsToLoad = new HashSet<>(); + Collections.addAll(fieldsToLoad, fields); + if (fetchSourceContext.fetchSource()) { + fieldsToLoad.addAll(loader.requiredStoredFields()); + } + return new CustomFieldsVisitor(fieldsToLoad, fetchSourceContext.fetchSource()); + } + Set sourceFields = fetchSourceContext.fetchSource() ? loader.requiredStoredFields() : Set.of(); + if (sourceFields.isEmpty()) { return fetchSourceContext.fetchSource() ? new FieldsVisitor(true) : null; } - - return new CustomFieldsVisitor(Sets.newHashSet(fields), fetchSourceContext.fetchSource()); + return new CustomFieldsVisitor(sourceFields, fetchSourceContext.fetchSource()); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java index 8f085195dae0c..11e9843ee243f 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java @@ -466,7 +466,7 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to" ); } - return new NumberFieldMapper.NumericSyntheticFieldLoader(name(), simpleName()) { + return new SortedNumericDocValuesSyntheticFieldLoader(name(), simpleName()) { @Override protected void writeValue(XContentBuilder b, long value) throws IOException { b.value(value == 1); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java index 22ef8c1bc3b20..c3728b8205025 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java @@ -915,7 +915,7 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to" ); } - return new NumberFieldMapper.NumericSyntheticFieldLoader(name(), simpleName()) { + return new SortedNumericDocValuesSyntheticFieldLoader(name(), simpleName()) { @Override protected void writeValue(XContentBuilder b, long value) throws IOException { b.value(fieldType().format(value, fieldType().dateTimeFormatter())); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java index 2951162275399..0143bf952e9dd 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java @@ -497,7 +497,7 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to" ); } - return new NumberFieldMapper.NumericSyntheticFieldLoader(name(), simpleName()) { + return new SortedNumericDocValuesSyntheticFieldLoader(name(), simpleName()) { final GeoPoint point = new GeoPoint(); @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java index c39e02b1d7012..21d9c0fe8fbc8 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java @@ -553,7 +553,7 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to" ); } - return new KeywordFieldMapper.BytesSyntheticFieldLoader(name(), simpleName()) { + return new SortedSetDocValuesSyntheticFieldLoader(name(), simpleName()) { @Override protected BytesRef convert(BytesRef value) { byte[] bytes = Arrays.copyOfRange(value.bytes, value.offset, value.offset + value.length); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index 8c4f5649f7915..a1386624d4239 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -15,15 +15,12 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.SortedSetDocValuesField; -import org.apache.lucene.index.DocValues; import org.apache.lucene.index.FilteredTermsEnum; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiTerms; import org.apache.lucene.index.ReaderSlice; -import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; @@ -65,7 +62,6 @@ import org.elasticsearch.search.runtime.StringScriptFieldRegexpQuery; import org.elasticsearch.search.runtime.StringScriptFieldTermQuery; import org.elasticsearch.search.runtime.StringScriptFieldWildcardQuery; -import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; import java.io.UncheckedIOException; @@ -1059,11 +1055,6 @@ protected SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String simpleNa if (hasScript()) { return SourceLoader.SyntheticFieldLoader.NOTHING; } - if (hasDocValues == false) { - throw new IllegalArgumentException( - "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it doesn't have doc values" - ); - } if (fieldType().ignoreAbove() != Defaults.IGNORE_ABOVE) { throw new IllegalArgumentException( "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares ignore_above" @@ -1079,7 +1070,19 @@ protected SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String simpleNa "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares a normalizer" ); } - return new BytesSyntheticFieldLoader(name(), simpleName) { + if (fieldType.stored()) { + return new StringStoredFieldFieldLoader(name(), simpleName); + } + if (hasDocValues == false) { + throw new IllegalArgumentException( + "field [" + + name() + + "] of type [" + + typeName() + + "] doesn't support synthetic source because it doesn't have doc values and isn't stored" + ); + } + return new SortedSetDocValuesSyntheticFieldLoader(name(), simpleName) { @Override protected BytesRef convert(BytesRef value) { return value; @@ -1093,174 +1096,4 @@ protected BytesRef preserve(BytesRef value) { }; } - public abstract static class BytesSyntheticFieldLoader implements SourceLoader.SyntheticFieldLoader { - private final String name; - private final String simpleName; - - public BytesSyntheticFieldLoader(String name, String simpleName) { - this.name = name; - this.simpleName = simpleName; - } - - @Override - public Leaf leaf(LeafReader reader, int[] docIdsInLeaf) throws IOException { - SortedSetDocValues dv = DocValues.getSortedSet(reader, name); - if (dv.getValueCount() == 0) { - return SourceLoader.SyntheticFieldLoader.NOTHING_LEAF; - } - if (docIdsInLeaf.length == 1) { - /* - * The singleton optimization is mostly about looking up ordinals - * in sorted order and doesn't buy anything if there is only a single - * document. - */ - return new ImmediateLeaf(dv); - } - SortedDocValues singleton = DocValues.unwrapSingleton(dv); - if (singleton != null) { - return singletonLeaf(singleton, docIdsInLeaf); - } - return new ImmediateLeaf(dv); - } - - /** - * Load all ordinals for all docs up front and resolve to their string - * values in order. This should be much more disk-friendly than - * {@link ImmediateLeaf} because it resolves the ordinals in order and - * marginally more cpu friendly because it resolves the ordinals one time. - */ - private Leaf singletonLeaf(SortedDocValues singleton, int[] docIdsInLeaf) throws IOException { - int[] ords = new int[docIdsInLeaf.length]; - int found = 0; - for (int d = 0; d < docIdsInLeaf.length; d++) { - if (false == singleton.advanceExact(docIdsInLeaf[d])) { - ords[d] = -1; - continue; - } - ords[d] = singleton.ordValue(); - found++; - } - if (found == 0) { - return SourceLoader.SyntheticFieldLoader.NOTHING_LEAF; - } - int[] sortedOrds = ords.clone(); - Arrays.sort(sortedOrds); - int unique = 0; - int prev = -1; - for (int ord : sortedOrds) { - if (ord != prev) { - prev = ord; - unique++; - } - } - int[] uniqueOrds = new int[unique]; - BytesRef[] converted = new BytesRef[unique]; - unique = 0; - prev = -1; - for (int ord : sortedOrds) { - if (ord != prev) { - prev = ord; - uniqueOrds[unique] = ord; - converted[unique] = preserve(convert(singleton.lookupOrd(ord))); - unique++; - } - } - logger.debug("loading [{}] on [{}] docs covering [{}] ords", name, docIdsInLeaf.length, uniqueOrds.length); - return new SourceLoader.SyntheticFieldLoader.Leaf() { - private int idx = -1; - - @Override - public boolean empty() { - return false; - } - - @Override - public boolean advanceToDoc(int docId) throws IOException { - idx++; - if (docIdsInLeaf[idx] != docId) { - throw new IllegalArgumentException( - "expected to be called with [" + docIdsInLeaf[idx] + "] but was called with " + docId + " instead" - ); - } - return ords[idx] >= 0; - } - - @Override - public void write(XContentBuilder b) throws IOException { - if (ords[idx] < 0) { - return; - } - int convertedIdx = Arrays.binarySearch(uniqueOrds, ords[idx]); - if (convertedIdx < 0) { - throw new IllegalStateException( - "received unexpected ord [" + ords[idx] + "]. Expected " + Arrays.toString(uniqueOrds) - ); - } - BytesRef c = converted[convertedIdx]; - b.field(simpleName).utf8Value(c.bytes, c.offset, c.length); - } - }; - } - - /** - * Load ordinals in line with populating the doc and immediately - * convert from ordinals into {@link BytesRef}s. - */ - private class ImmediateLeaf implements Leaf { - private final SortedSetDocValues dv; - private boolean hasValue; - - ImmediateLeaf(SortedSetDocValues dv) { - this.dv = dv; - } - - @Override - public boolean empty() { - return false; - } - - @Override - public boolean advanceToDoc(int docId) throws IOException { - return hasValue = dv.advanceExact(docId); - } - - @Override - public void write(XContentBuilder b) throws IOException { - if (false == hasValue) { - return; - } - long first = dv.nextOrd(); - long next = dv.nextOrd(); - if (next == SortedSetDocValues.NO_MORE_ORDS) { - BytesRef c = convert(dv.lookupOrd(first)); - b.field(simpleName).utf8Value(c.bytes, c.offset, c.length); - return; - } - b.startArray(simpleName); - BytesRef c = convert(dv.lookupOrd(first)); - b.utf8Value(c.bytes, c.offset, c.length); - c = convert(dv.lookupOrd(next)); - b.utf8Value(c.bytes, c.offset, c.length); - while ((next = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { - c = convert(dv.lookupOrd(next)); - b.utf8Value(c.bytes, c.offset, c.length); - } - b.endArray(); - } - } - - /** - * Convert a {@link BytesRef} read from the source into bytes to write - * to the xcontent. This shouldn't make a deep copy if the conversion - * process itself doesn't require one. - */ - protected abstract BytesRef convert(BytesRef value); - - /** - * Preserves {@link BytesRef bytes} returned by {@link #convert} - * to by written later. This should make a - * {@link BytesRef#deepCopyOf deep copy} if {@link #convert} didn't. - */ - protected abstract BytesRef preserve(BytesRef value); - } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java index 1210c23880a64..bc7ac13249876 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java @@ -14,11 +14,7 @@ import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; -import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.sandbox.document.HalfFloatPoint; import org.apache.lucene.sandbox.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.IndexOrDocValuesQuery; @@ -396,7 +392,7 @@ private static void validateParsed(float value) { @Override SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String fieldName, String fieldSimpleName) { - return new NumericSyntheticFieldLoader(fieldName, fieldSimpleName) { + return new SortedNumericDocValuesSyntheticFieldLoader(fieldName, fieldSimpleName) { @Override protected void writeValue(XContentBuilder b, long value) throws IOException { b.value(HalfFloatPoint.sortableShortToHalfFloat((short) value)); @@ -546,7 +542,7 @@ private static void validateParsed(float value) { @Override SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String fieldName, String fieldSimpleName) { - return new NumericSyntheticFieldLoader(fieldName, fieldSimpleName) { + return new SortedNumericDocValuesSyntheticFieldLoader(fieldName, fieldSimpleName) { @Override protected void writeValue(XContentBuilder b, long value) throws IOException { b.value(NumericUtils.sortableIntToFloat((int) value)); @@ -674,7 +670,7 @@ private static void validateParsed(double value) { @Override SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String fieldName, String fieldSimpleName) { - return new NumericSyntheticFieldLoader(fieldName, fieldSimpleName) { + return new SortedNumericDocValuesSyntheticFieldLoader(fieldName, fieldSimpleName) { @Override protected void writeValue(XContentBuilder b, long value) throws IOException { b.value(NumericUtils.sortableLongToDouble(value)); @@ -1381,7 +1377,7 @@ public double reduceToStoredPrecision(double value) { abstract SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String fieldName, String fieldSimpleName); private static SourceLoader.SyntheticFieldLoader syntheticLongFieldLoader(String fieldName, String fieldSimpleName) { - return new NumericSyntheticFieldLoader(fieldName, fieldSimpleName) { + return new SortedNumericDocValuesSyntheticFieldLoader(fieldName, fieldSimpleName) { @Override protected void writeValue(XContentBuilder b, long value) throws IOException { b.value(value); @@ -1754,140 +1750,4 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { return type.syntheticFieldLoader(name(), simpleName()); } - public abstract static class NumericSyntheticFieldLoader implements SourceLoader.SyntheticFieldLoader { - private final String name; - private final String simpleName; - - protected NumericSyntheticFieldLoader(String name, String simpleName) { - this.name = name; - this.simpleName = simpleName; - } - - @Override - public Leaf leaf(LeafReader reader, int[] docIdsInLeaf) throws IOException { - SortedNumericDocValues dv = docValuesOrNull(reader, name); - if (dv == null) { - return SourceLoader.SyntheticFieldLoader.NOTHING_LEAF; - } - if (docIdsInLeaf.length > 1) { - /* - * The singleton optimization is mostly about looking up all - * values for the field at once. If there's just a single - * document then it's just extra overhead. - */ - NumericDocValues single = DocValues.unwrapSingleton(dv); - if (single != null) { - return singletonLeaf(single, docIdsInLeaf); - } - } - return new ImmediateLeaf(dv); - } - - private class ImmediateLeaf implements Leaf { - private final SortedNumericDocValues dv; - private boolean hasValue; - - ImmediateLeaf(SortedNumericDocValues dv) { - this.dv = dv; - } - - @Override - public boolean empty() { - return false; - } - - @Override - public boolean advanceToDoc(int docId) throws IOException { - return hasValue = dv.advanceExact(docId); - } - - @Override - public void write(XContentBuilder b) throws IOException { - if (false == hasValue) { - return; - } - if (dv.docValueCount() == 1) { - b.field(simpleName); - writeValue(b, dv.nextValue()); - return; - } - b.startArray(simpleName); - for (int i = 0; i < dv.docValueCount(); i++) { - writeValue(b, dv.nextValue()); - } - b.endArray(); - } - } - - /** - * Load all values for all docs up front. This should be much more - * disk and cpu-friendly than {@link ImmediateLeaf} because it resolves - * the values all at once, always scanning forwards on the disk. - */ - private Leaf singletonLeaf(NumericDocValues singleton, int[] docIdsInLeaf) throws IOException { - long[] values = new long[docIdsInLeaf.length]; - boolean[] hasValue = new boolean[docIdsInLeaf.length]; - boolean found = false; - for (int d = 0; d < docIdsInLeaf.length; d++) { - if (false == singleton.advanceExact(docIdsInLeaf[d])) { - hasValue[d] = false; - continue; - } - hasValue[d] = true; - values[d] = singleton.longValue(); - found = true; - } - if (found == false) { - return SourceLoader.SyntheticFieldLoader.NOTHING_LEAF; - } - return new Leaf() { - private int idx = -1; - - @Override - public boolean empty() { - return false; - } - - @Override - public boolean advanceToDoc(int docId) throws IOException { - idx++; - if (docIdsInLeaf[idx] != docId) { - throw new IllegalArgumentException( - "expected to be called with [" + docIdsInLeaf[idx] + "] but was called with " + docId + " instead" - ); - } - return hasValue[idx]; - } - - @Override - public void write(XContentBuilder b) throws IOException { - if (hasValue[idx] == false) { - return; - } - b.field(simpleName); - writeValue(b, values[idx]); - } - }; - } - - /** - * Returns a {@link SortedNumericDocValues} or null if it doesn't have any doc values. - * See {@link DocValues#getSortedNumeric} which is *nearly* the same, but it returns - * an "empty" implementation if there aren't any doc values. We need to be able to - * tell if there aren't any and return our empty leaf source loader. - */ - public static SortedNumericDocValues docValuesOrNull(LeafReader reader, String fieldName) throws IOException { - SortedNumericDocValues dv = reader.getSortedNumericDocValues(fieldName); - if (dv != null) { - return dv; - } - NumericDocValues single = reader.getNumericDocValues(fieldName); - if (single != null) { - return DocValues.singleton(single); - } - return null; - } - - protected abstract void writeValue(XContentBuilder b, long value) throws IOException; - } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java index 636cd3628beeb..f391b0c9a7833 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java @@ -8,6 +8,7 @@ package org.elasticsearch.index.mapper; +import org.apache.lucene.index.LeafReader; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.Version; import org.elasticsearch.common.Explicit; @@ -28,6 +29,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.stream.Stream; public class ObjectMapper extends Mapper implements Cloneable { private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(ObjectMapper.class); @@ -567,52 +569,68 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep @Override public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { - List fields = mappers.values() - .stream() - .sorted(Comparator.comparing(Mapper::name)) - .map(Mapper::syntheticFieldLoader) - .filter(l -> l != null) - .toList(); - return (reader, docIdsInLeaf) -> { - List l = new ArrayList<>(); - for (SourceLoader.SyntheticFieldLoader field : fields) { - SourceLoader.SyntheticFieldLoader.Leaf leaf = field.leaf(reader, docIdsInLeaf); - if (false == leaf.empty()) { - l.add(leaf); - } - } - SourceLoader.SyntheticFieldLoader.Leaf[] leaves = l.toArray(SourceLoader.SyntheticFieldLoader.Leaf[]::new); - return new SourceLoader.SyntheticFieldLoader.Leaf() { - private boolean hasValue; + return new SyntheticSourceFieldLoader( + mappers.values() + .stream() + .sorted(Comparator.comparing(Mapper::name)) + .map(Mapper::syntheticFieldLoader) + .filter(l -> l != null) + .toList() + ); + } - @Override - public boolean empty() { - return leaves.length == 0; - } + private class SyntheticSourceFieldLoader implements SourceLoader.SyntheticFieldLoader { + private final List fields; + private boolean hasValue; - @Override - public boolean advanceToDoc(int docId) throws IOException { - hasValue = false; - for (SourceLoader.SyntheticFieldLoader.Leaf leaf : leaves) { - boolean leafHasValue = leaf.advanceToDoc(docId); - hasValue |= leafHasValue; - } - return hasValue; - } + private SyntheticSourceFieldLoader(List fields) { + this.fields = fields; + } - @Override - public void write(XContentBuilder b) throws IOException { - if (hasValue == false) { - return; - } - startSyntheticField(b); - for (SourceLoader.SyntheticFieldLoader.Leaf leaf : leaves) { - leaf.write(b); - } - b.endObject(); + @Override + public Stream> storedFieldLoaders() { + return fields.stream().flatMap(SourceLoader.SyntheticFieldLoader::storedFieldLoaders).map(e -> Map.entry(e.getKey(), values -> { + hasValue = true; + e.getValue().load(values); + })); + } + + @Override + public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException { + List loaders = new ArrayList<>(); + for (SourceLoader.SyntheticFieldLoader field : fields) { + SourceLoader.SyntheticFieldLoader.DocValuesLoader loader = field.docValuesLoader(leafReader, docIdsInLeaf); + if (loader != null) { + loaders.add(loader); } + } + return docId -> { + for (SourceLoader.SyntheticFieldLoader.DocValuesLoader docValueLoader : loaders) { + boolean leafHasValue = docValueLoader.advanceToDoc(docId); + hasValue |= leafHasValue; + } + /* + * Important and kind of sneaky note: this will return true + * if there were any values loaded from stored fields as + * well. That *is* how we "wake up" objects that contain just + * stored field. + */ + return hasValue; }; - }; + } + + @Override + public void write(XContentBuilder b) throws IOException { + if (hasValue == false) { + return; + } + startSyntheticField(b); + for (SourceLoader.SyntheticFieldLoader field : fields) { + field.write(b); + } + b.endObject(); + hasValue = false; + } } protected void startSyntheticField(XContentBuilder b) throws IOException { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SortedNumericDocValuesSyntheticFieldLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/SortedNumericDocValuesSyntheticFieldLoader.java new file mode 100644 index 0000000000000..0ad69a0498a6c --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/SortedNumericDocValuesSyntheticFieldLoader.java @@ -0,0 +1,202 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Map; +import java.util.stream.Stream; + +/** + * Load {@code _source} fields from {@link SortedNumericDocValues}. + */ +public abstract class SortedNumericDocValuesSyntheticFieldLoader implements SourceLoader.SyntheticFieldLoader { + private final String name; + private final String simpleName; + private Values values = NO_VALUES; + + protected SortedNumericDocValuesSyntheticFieldLoader(String name, String simpleName) { + this.name = name; + this.simpleName = simpleName; + } + + protected abstract void writeValue(XContentBuilder b, long value) throws IOException; + + @Override + public Stream> storedFieldLoaders() { + return Stream.of(); + } + + @Override + public DocValuesLoader docValuesLoader(LeafReader reader, int[] docIdsInLeaf) throws IOException { + SortedNumericDocValues dv = docValuesOrNull(reader, name); + if (dv == null) { + values = NO_VALUES; + return null; + } + if (docIdsInLeaf.length > 1) { + /* + * The singleton optimization is mostly about looking up all + * values for the field at once. If there's just a single + * document then it's just extra overhead. + */ + NumericDocValues single = DocValues.unwrapSingleton(dv); + if (single != null) { + SingletonDocValuesLoader loader = buildSingletonDocValuesLoader(single, docIdsInLeaf); + values = loader == null ? NO_VALUES : loader; + return loader; + } + } + ImmediateDocValuesLoader loader = new ImmediateDocValuesLoader(dv); + values = loader; + return loader; + } + + @Override + public void write(XContentBuilder b) throws IOException { + switch (values.count()) { + case 0: + return; + case 1: + b.field(simpleName); + values.write(b); + return; + default: + b.startArray(simpleName); + values.write(b); + b.endArray(); + return; + } + } + + private interface Values { + int count(); + + void write(XContentBuilder b) throws IOException; + } + + private static final Values NO_VALUES = new Values() { + @Override + public int count() { + return 0; + } + + @Override + public void write(XContentBuilder b) throws IOException {} + }; + + private class ImmediateDocValuesLoader implements DocValuesLoader, Values { + private final SortedNumericDocValues dv; + private boolean hasValue; + + ImmediateDocValuesLoader(SortedNumericDocValues dv) { + this.dv = dv; + } + + @Override + public boolean advanceToDoc(int docId) throws IOException { + return hasValue = dv.advanceExact(docId); + } + + @Override + public int count() { + return hasValue ? dv.docValueCount() : 0; + } + + @Override + public void write(XContentBuilder b) throws IOException { + for (int i = 0; i < dv.docValueCount(); i++) { + writeValue(b, dv.nextValue()); + } + } + } + + private SingletonDocValuesLoader buildSingletonDocValuesLoader(NumericDocValues singleton, int[] docIdsInLeaf) throws IOException { + long[] values = new long[docIdsInLeaf.length]; + boolean[] hasValue = new boolean[docIdsInLeaf.length]; + boolean found = false; + for (int d = 0; d < docIdsInLeaf.length; d++) { + if (false == singleton.advanceExact(docIdsInLeaf[d])) { + hasValue[d] = false; + continue; + } + hasValue[d] = true; + values[d] = singleton.longValue(); + found = true; + } + if (found == false) { + return null; + } + return new SingletonDocValuesLoader(docIdsInLeaf, values, hasValue); + } + + /** + * Load all values for all docs up front. This should be much more + * disk and cpu-friendly than {@link ImmediateDocValuesLoader} because + * it resolves the values all at once, always scanning forwards on + * the disk. + */ + private class SingletonDocValuesLoader implements DocValuesLoader, Values { + private final int[] docIdsInLeaf; + private final long[] values; + private final boolean[] hasValue; + private int idx = -1; + + private SingletonDocValuesLoader(int[] docIdsInLeaf, long[] values, boolean[] hasValue) { + this.docIdsInLeaf = docIdsInLeaf; + this.values = values; + this.hasValue = hasValue; + } + + @Override + public boolean advanceToDoc(int docId) throws IOException { + idx++; + if (docIdsInLeaf[idx] != docId) { + throw new IllegalArgumentException( + "expected to be called with [" + docIdsInLeaf[idx] + "] but was called with " + docId + " instead" + ); + } + return hasValue[idx]; + } + + @Override + public int count() { + return hasValue[idx] ? 1 : 0; + } + + @Override + public void write(XContentBuilder b) throws IOException { + assert hasValue[idx]; + writeValue(b, values[idx]); + } + } + + /** + * Returns a {@link SortedNumericDocValues} or null if it doesn't have any doc values. + * See {@link DocValues#getSortedNumeric} which is *nearly* the same, but it returns + * an "empty" implementation if there aren't any doc values. We need to be able to + * tell if there aren't any and return our empty leaf source loader. + */ + public static SortedNumericDocValues docValuesOrNull(LeafReader reader, String fieldName) throws IOException { + SortedNumericDocValues dv = reader.getSortedNumericDocValues(fieldName); + if (dv != null) { + return dv; + } + NumericDocValues single = reader.getNumericDocValues(fieldName); + if (single != null) { + return DocValues.singleton(single); + } + return null; + } +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SortedSetDocValuesSyntheticFieldLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/SortedSetDocValuesSyntheticFieldLoader.java new file mode 100644 index 0000000000000..321d1620c41b6 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/SortedSetDocValuesSyntheticFieldLoader.java @@ -0,0 +1,237 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Map; +import java.util.stream.Stream; + +/** + * Load {@code _source} fields from {@link SortedSetDocValues}. + */ +public abstract class SortedSetDocValuesSyntheticFieldLoader implements SourceLoader.SyntheticFieldLoader { + private static final Logger logger = LogManager.getLogger(SortedSetDocValuesSyntheticFieldLoader.class); + + private final String name; + private final String simpleName; + private Values values = NO_VALUES; + + public SortedSetDocValuesSyntheticFieldLoader(String name, String simpleName) { + this.name = name; + this.simpleName = simpleName; + } + + @Override + public Stream> storedFieldLoaders() { + return Stream.of(); + } + + @Override + public DocValuesLoader docValuesLoader(LeafReader reader, int[] docIdsInLeaf) throws IOException { + SortedSetDocValues dv = DocValues.getSortedSet(reader, name); + if (dv.getValueCount() == 0) { + values = NO_VALUES; + return null; + } + if (docIdsInLeaf.length > 1) { + /* + * The singleton optimization is mostly about looking up ordinals + * in sorted order and doesn't buy anything if there is only a single + * document. + */ + SortedDocValues singleton = DocValues.unwrapSingleton(dv); + if (singleton != null) { + SingletonDocValuesLoader loader = buildSingletonDocValuesLoader(singleton, docIdsInLeaf); + values = loader == null ? NO_VALUES : loader; + return loader; + } + } + ImmediateDocValuesLoader loader = new ImmediateDocValuesLoader(dv); + values = loader; + return loader; + } + + @Override + public void write(XContentBuilder b) throws IOException { + switch (values.count()) { + case 0: + return; + case 1: + b.field(simpleName); + values.write(b); + return; + default: + b.startArray(simpleName); + values.write(b); + b.endArray(); + return; + } + } + + private interface Values { + int count(); + + void write(XContentBuilder b) throws IOException; + } + + private static final Values NO_VALUES = new Values() { + @Override + public int count() { + return 0; + } + + @Override + public void write(XContentBuilder b) throws IOException {} + }; + + /** + * Load ordinals in line with populating the doc and immediately + * convert from ordinals into {@link BytesRef}s. + */ + private class ImmediateDocValuesLoader implements DocValuesLoader, Values { + private final SortedSetDocValues dv; + private boolean hasValue; + + ImmediateDocValuesLoader(SortedSetDocValues dv) { + this.dv = dv; + } + + @Override + public boolean advanceToDoc(int docId) throws IOException { + return hasValue = dv.advanceExact(docId); + } + + @Override + public int count() { + return hasValue ? dv.docValueCount() : 0; + } + + @Override + public void write(XContentBuilder b) throws IOException { + assert hasValue; + for (int i = 0; i < dv.docValueCount(); i++) { + BytesRef c = convert(dv.lookupOrd(dv.nextOrd())); + b.utf8Value(c.bytes, c.offset, c.length); + } + } + } + + /** + * Load all ordinals for all docs up front and resolve to their string + * values in order. This should be much more disk-friendly than + * {@link ImmediateDocValuesLoader} because it resolves the ordinals in order and + * marginally more cpu friendly because it resolves the ordinals one time. + */ + private SingletonDocValuesLoader buildSingletonDocValuesLoader(SortedDocValues singleton, int[] docIdsInLeaf) throws IOException { + int[] ords = new int[docIdsInLeaf.length]; + int found = 0; + for (int d = 0; d < docIdsInLeaf.length; d++) { + if (false == singleton.advanceExact(docIdsInLeaf[d])) { + ords[d] = -1; + continue; + } + ords[d] = singleton.ordValue(); + found++; + } + if (found == 0) { + return null; + } + int[] sortedOrds = ords.clone(); + Arrays.sort(sortedOrds); + int unique = 0; + int prev = -1; + for (int ord : sortedOrds) { + if (ord != prev) { + prev = ord; + unique++; + } + } + int[] uniqueOrds = new int[unique]; + BytesRef[] converted = new BytesRef[unique]; + unique = 0; + prev = -1; + for (int ord : sortedOrds) { + if (ord != prev) { + prev = ord; + uniqueOrds[unique] = ord; + converted[unique] = preserve(convert(singleton.lookupOrd(ord))); + unique++; + } + } + logger.debug("loading [{}] on [{}] docs covering [{}] ords", name, docIdsInLeaf.length, uniqueOrds.length); + return new SingletonDocValuesLoader(docIdsInLeaf, ords, uniqueOrds, converted); + } + + private class SingletonDocValuesLoader implements DocValuesLoader, Values { + private final int[] docIdsInLeaf; + private final int[] ords; + private final int[] uniqueOrds; + private final BytesRef[] converted; + + private int idx = -1; + + private SingletonDocValuesLoader(int[] docIdsInLeaf, int[] ords, int[] uniqueOrds, BytesRef[] converted) { + this.docIdsInLeaf = docIdsInLeaf; + this.ords = ords; + this.uniqueOrds = uniqueOrds; + this.converted = converted; + } + + @Override + public boolean advanceToDoc(int docId) throws IOException { + idx++; + if (docIdsInLeaf[idx] != docId) { + throw new IllegalArgumentException( + "expected to be called with [" + docIdsInLeaf[idx] + "] but was called with " + docId + " instead" + ); + } + return ords[idx] >= 0; + } + + @Override + public int count() { + return ords[idx] < 0 ? 0 : 1; + } + + @Override + public void write(XContentBuilder b) throws IOException { + assert ords[idx] >= 0; + int convertedIdx = Arrays.binarySearch(uniqueOrds, ords[idx]); + if (convertedIdx < 0) { + throw new IllegalStateException("received unexpected ord [" + ords[idx] + "]. Expected " + Arrays.toString(uniqueOrds)); + } + BytesRef c = converted[convertedIdx]; + b.utf8Value(c.bytes, c.offset, c.length); + } + } + + /** + * Convert a {@link BytesRef} read from the source into bytes to write + * to the xcontent. This shouldn't make a deep copy if the conversion + * process itself doesn't require one. + */ + protected abstract BytesRef convert(BytesRef value); + + /** + * Preserves {@link BytesRef bytes} returned by {@link #convert} + * to by written later. This should make a + * {@link BytesRef#deepCopyOf deep copy} if {@link #convert} didn't. + */ + protected abstract BytesRef preserve(BytesRef value); +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SourceLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/SourceLoader.java index 351608c9a20cf..70111f1e48ffe 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/SourceLoader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/SourceLoader.java @@ -16,6 +16,11 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; /** * Loads source {@code _source} during a GET or {@code _search}. @@ -31,6 +36,12 @@ public interface SourceLoader { */ Leaf leaf(LeafReader reader, int[] docIdsInLeaf) throws IOException; + /** + * Stream containing all non-{@code _source} stored fields required + * to build the {@code _source}. + */ + Set requiredStoredFields(); + /** * Loads {@code _source} from some segment. */ @@ -64,6 +75,11 @@ public boolean reordersFieldValues() { public Leaf leaf(LeafReader reader, int[] docIdsInLeaf) { return (fieldsVisitor, docId) -> fieldsVisitor.source(); } + + @Override + public Set requiredStoredFields() { + return Set.of(); + } }; /** @@ -71,9 +87,11 @@ public Leaf leaf(LeafReader reader, int[] docIdsInLeaf) { */ class Synthetic implements SourceLoader { private final SyntheticFieldLoader loader; + private final Map storedFieldLoaders; public Synthetic(Mapping mapping) { loader = mapping.getRoot().syntheticFieldLoader(); + storedFieldLoaders = Map.copyOf(loader.storedFieldLoaders().collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); } @Override @@ -81,17 +99,27 @@ public boolean reordersFieldValues() { return true; } + @Override + public Set requiredStoredFields() { + return storedFieldLoaders.keySet(); + } + @Override public Leaf leaf(LeafReader reader, int[] docIdsInLeaf) throws IOException { - SyntheticFieldLoader.Leaf leaf = loader.leaf(reader, docIdsInLeaf); - if (leaf.empty()) { - return Leaf.EMPTY_OBJECT; - } + SyntheticFieldLoader.DocValuesLoader leaf = loader.docValuesLoader(reader, docIdsInLeaf); return (fieldsVisitor, docId) -> { + if (fieldsVisitor != null) { + for (Map.Entry> e : fieldsVisitor.fields().entrySet()) { + SyntheticFieldLoader.StoredFieldLoader loader = storedFieldLoaders.get(e.getKey()); + if (loader != null) { + loader.load(e.getValue()); + } + } + } // TODO accept a requested xcontent type try (XContentBuilder b = new XContentBuilder(JsonXContent.jsonXContent, new ByteArrayOutputStream())) { if (leaf.advanceToDoc(docId)) { - leaf.write(b); + loader.write(b); } else { b.startObject().endObject(); } @@ -103,54 +131,89 @@ public Leaf leaf(LeafReader reader, int[] docIdsInLeaf) throws IOException { /** * Load a field for {@link Synthetic}. + *

+ * {@link SyntheticFieldLoader}s load values through objects vended + * by their {@link #storedFieldLoaders} and {@link #docValuesLoader} + * methods. Then you call {@link #write} to write the values to an + * {@link XContentBuilder} which also clears them. + *

+ * This two loaders and one writer setup is specifically designed to + * efficiently load the {@code _source} of indices that have thousands + * of fields declared in the mapping but that only have values for + * dozens of them. It handles this in a few ways: + *

    + *
  • {@link #docValuesLoader} must be called once per document + * per field to load the doc values, but detects up front if + * there are no doc values for that field. It's linear with + * the number of fields, whether or not they have values, + * but skips entirely missing fields.
  • + *
  • {@link #storedFieldLoaders} are only called when the + * document contains a stored field with the appropriate name. + * So it's fine to have thousands of these declared in the + * mapping and you don't really pay much to load them. Just + * the cost to build {@link Map} used to address them.
  • + *
  • Object fields that don't have any values loaded by either + * means bail out of the loading process and don't pass + * control down to any of their children. Thus it's fine + * to declare huge object structures in the mapping and + * you only spend time iterating the ones you need. Or that + * have doc values.
  • + *
*/ interface SyntheticFieldLoader { /** * Load no values. */ - SyntheticFieldLoader.Leaf NOTHING_LEAF = new Leaf() { + SyntheticFieldLoader NOTHING = new SyntheticFieldLoader() { @Override - public boolean empty() { - return true; + public Stream> storedFieldLoaders() { + return Stream.of(); } @Override - public boolean advanceToDoc(int docId) throws IOException { - return false; + public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException { + return null; } @Override - public void write(XContentBuilder b) throws IOException {} + public void write(XContentBuilder b) {} }; /** - * Load no values. + * A {@link Stream} mapping stored field paths to a place to put them + * so they can be included in the next document. */ - SyntheticFieldLoader NOTHING = (r, docIds) -> NOTHING_LEAF; + Stream> storedFieldLoaders(); /** - * Build a loader for this field in the provided segment. + * Build something to load doc values for this field or return + * {@code null} if there are no doc values for this field to + * load. */ - Leaf leaf(LeafReader reader, int[] docIdsInLeaf) throws IOException; + DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException; /** - * Loads values for a field in a particular leaf. + * Write values for this document. */ - interface Leaf { - /** - * Is this entirely empty? - */ - boolean empty(); + void write(XContentBuilder b) throws IOException; - /** - * Position the loader at a document. - */ - boolean advanceToDoc(int docId) throws IOException; + /** + * Sync for stored field values. + */ + interface StoredFieldLoader { + void load(List values); + } + /** + * Loads doc values for a field. + */ + interface DocValuesLoader { /** - * Write values for this document. + * Load the doc values for this field. + * + * @return whether or not there are any values for this field */ - void write(XContentBuilder b) throws IOException; + boolean advanceToDoc(int docId) throws IOException; } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/StringStoredFieldFieldLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/StringStoredFieldFieldLoader.java new file mode 100644 index 0000000000000..0cd65d0ee3059 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/StringStoredFieldFieldLoader.java @@ -0,0 +1,64 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.index.LeafReader; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; + +public class StringStoredFieldFieldLoader + implements + SourceLoader.SyntheticFieldLoader, + SourceLoader.SyntheticFieldLoader.StoredFieldLoader { + private final String name; + private final String simpleName; + private List values; + + public StringStoredFieldFieldLoader(String name, String simpleName) { + this.name = name; + this.simpleName = simpleName; + } + + @Override + public Stream> storedFieldLoaders() { + return Stream.of(Map.entry(name, this)); + } + + @Override + public void load(List values) { + this.values = values; + } + + @Override + public void write(XContentBuilder b) throws IOException { + if (values == null || values.isEmpty()) { + return; + } + if (values.size() == 1) { + b.field(simpleName, values.get(0).toString()); + values = null; + return; + } + b.startArray(simpleName); + for (Object value : values) { + b.value(value.toString()); + } + b.endArray(); + values = null; + } + + @Override + public final DocValuesLoader docValuesLoader(LeafReader reader, int[] docIdsInLeaf) throws IOException { + return null; + } +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 852d03bcfafff..721f0bbc7ab61 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -1272,12 +1272,15 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to" ); } + if (store) { + return new StringStoredFieldFieldLoader(name(), simpleName()); + } for (Mapper sub : this) { if (sub.typeName().equals(KeywordFieldMapper.CONTENT_TYPE)) { KeywordFieldMapper kwd = (KeywordFieldMapper) sub; - if (kwd.fieldType().hasDocValues() - && kwd.hasNormalizer() == false - && kwd.fieldType().ignoreAbove() == KeywordFieldMapper.Defaults.IGNORE_ABOVE) { + if (kwd.hasNormalizer() == false + && kwd.fieldType().ignoreAbove() == KeywordFieldMapper.Defaults.IGNORE_ABOVE + && (kwd.fieldType().hasDocValues() || kwd.fieldType().isStored())) { return kwd.syntheticFieldLoader(simpleName()); } @@ -1286,8 +1289,8 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { throw new IllegalArgumentException( String.format( Locale.ROOT, - "field [%s] of type [%s] doesn't support synthetic source unless it has a sub-field of" - + " type [keyword] with doc values enabled and without ignore_above or a normalizer", + "field [%s] of type [%s] doesn't support synthetic source unless it is stored or has a sub-field of" + + " type [keyword] with doc values or stored and without ignore_above or a normalizer", name(), typeName() ) diff --git a/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java index 79492167596d3..6b2fddd7fa0ee 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java @@ -109,8 +109,9 @@ private SearchHits buildSearchHits(SearchContext context, Profiler profiler) { // make sure that we iterate in doc id order Arrays.sort(docs); + SourceLoader sourceLoader = context.newSourceLoader(); Map> storedToRequestedFields = new HashMap<>(); - FieldsVisitor fieldsVisitor = createStoredFieldsVisitor(context, storedToRequestedFields); + FieldsVisitor fieldsVisitor = createStoredFieldsVisitor(context, storedToRequestedFields, sourceLoader); profiler.visitor(fieldsVisitor); FetchContext fetchContext = new FetchContext(context); @@ -241,7 +242,11 @@ public int compareTo(DocIdToIndex o) { } } - private static FieldsVisitor createStoredFieldsVisitor(SearchContext context, Map> storedToRequestedFields) { + private static FieldsVisitor createStoredFieldsVisitor( + SearchContext context, + Map> storedToRequestedFields, + SourceLoader sourceLoader + ) { StoredFieldsContext storedFieldsContext = context.storedFieldsContext(); if (storedFieldsContext == null) { @@ -250,6 +255,11 @@ private static FieldsVisitor createStoredFieldsVisitor(SearchContext context, Ma context.fetchSourceContext(FetchSourceContext.FETCH_SOURCE); } boolean loadSource = sourceRequired(context); + if (loadSource) { + if (false == sourceLoader.requiredStoredFields().isEmpty()) { + return new CustomFieldsVisitor(sourceLoader.requiredStoredFields(), true); + } + } return new FieldsVisitor(loadSource); } else if (storedFieldsContext.fetchFields() == false) { // disable stored fields entirely @@ -273,6 +283,9 @@ private static FieldsVisitor createStoredFieldsVisitor(SearchContext context, Ma } } boolean loadSource = sourceRequired(context); + if (loadSource) { + sourceLoader.requiredStoredFields().forEach(fieldName -> storedToRequestedFields.putIfAbsent(fieldName, Set.of())); + } if (storedToRequestedFields.isEmpty()) { // empty list specified, default to disable _source if no explicit indication return new FieldsVisitor(loadSource); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java index d7c8fb8314b04..1df397088d027 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java @@ -623,11 +623,17 @@ public void testKeywordFieldUtf8LongerThan32766() throws Exception { @Override protected SyntheticSourceSupport syntheticSourceSupport() { - return new KeywordSyntheticSourceSupport(); + return new KeywordSyntheticSourceSupport(randomBoolean(), usually() ? null : randomAlphaOfLength(2)); } static class KeywordSyntheticSourceSupport implements SyntheticSourceSupport { - private final String nullValue = usually() ? null : randomAlphaOfLength(2); + private final boolean store; + private final String nullValue; + + KeywordSyntheticSourceSupport(boolean store, String nullValue) { + this.store = store; + this.nullValue = nullValue; + } @Override public SyntheticSourceExample example(int maxValues) { @@ -637,7 +643,9 @@ public SyntheticSourceExample example(int maxValues) { } List> values = randomList(1, maxValues, this::generateValue); List in = values.stream().map(Tuple::v1).toList(); - List outList = values.stream().map(Tuple::v2).collect(Collectors.toSet()).stream().sorted().toList(); + List outList = store + ? values.stream().map(Tuple::v2).toList() + : values.stream().map(Tuple::v2).collect(Collectors.toSet()).stream().sorted().toList(); Object out = outList.size() == 1 ? outList.get(0) : outList; return new SyntheticSourceExample(in, out, this::mapping); } @@ -655,13 +663,22 @@ private void mapping(XContentBuilder b) throws IOException { if (nullValue != null) { b.field("null_value", nullValue); } + if (store) { + b.field("store", true); + if (randomBoolean()) { + b.field("doc_values", false); + } + } } @Override public List invalidExample() throws IOException { return List.of( new SyntheticSourceInvalidExample( - equalTo("field [field] of type [keyword] doesn't support synthetic source because it doesn't have doc values"), + equalTo( + "field [field] of type [keyword] doesn't support synthetic source because " + + "it doesn't have doc values and isn't stored" + ), b -> b.field("type", "keyword").field("doc_values", false) ), new SyntheticSourceInvalidExample( diff --git a/server/src/test/java/org/elasticsearch/index/mapper/SourceLoaderTests.java b/server/src/test/java/org/elasticsearch/index/mapper/SourceLoaderTests.java index b33df2b94c370..70ab6a1e6c998 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/SourceLoaderTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/SourceLoaderTests.java @@ -41,8 +41,8 @@ public void testUnsupported() throws IOException { assertThat( e.getMessage(), equalTo( - "field [txt] of type [text] doesn't support synthetic source unless" - + " it has a sub-field of type [keyword] with doc values enabled and without ignore_above or a normalizer" + "field [txt] of type [text] doesn't support synthetic source unless it is stored or has a sub-field " + + "of type [keyword] with doc values or stored and without ignore_above or a normalizer" ) ); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index 3950e64a1fc60..5d0cf572ffdd9 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -1095,11 +1095,21 @@ protected void randomFetchTestFieldConfig(XContentBuilder b) throws IOException @Override protected SyntheticSourceSupport syntheticSourceSupport() { - SyntheticSourceSupport supportDelegate = new KeywordFieldMapperTests.KeywordSyntheticSourceSupport(); + boolean storeTextField = randomBoolean(); + boolean storedKeywordField = storeTextField || randomBoolean(); + String nullValue = storeTextField || usually() ? null : randomAlphaOfLength(2); return new SyntheticSourceSupport() { @Override - public SyntheticSourceExample example(int maxValues) throws IOException { - SyntheticSourceExample delegate = supportDelegate.example(maxValues); + public SyntheticSourceExample example(int maxValues) { + SyntheticSourceExample delegate = new KeywordFieldMapperTests.KeywordSyntheticSourceSupport(storedKeywordField, nullValue) + .example(maxValues); + if (storeTextField) { + return new SyntheticSourceExample( + delegate.inputValue(), + delegate.result(), + b -> b.field("type", "text").field("store", true) + ); + } return new SyntheticSourceExample(delegate.inputValue(), delegate.result(), b -> { b.field("type", "text"); b.startObject("fields"); @@ -1115,8 +1125,8 @@ public SyntheticSourceExample example(int maxValues) throws IOException { @Override public List invalidExample() throws IOException { Matcher err = equalTo( - "field [field] of type [text] doesn't support synthetic source " - + "unless it has a sub-field of type [keyword] with doc values enabled and without ignore_above or a normalizer" + "field [field] of type [text] doesn't support synthetic source unless it is stored or" + + " has a sub-field of type [keyword] with doc values or stored and without ignore_above or a normalizer" ); return List.of( new SyntheticSourceInvalidExample(err, TextFieldMapperTests.this::minimalMapping), diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java index 45d55fa86f57d..84df3ae8d076c 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java @@ -13,6 +13,7 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReader; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; @@ -41,6 +42,8 @@ import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.IndexFieldDataCache; +import org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor; +import org.elasticsearch.index.fieldvisitor.FieldsVisitor; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.query.support.NestedScope; @@ -658,8 +661,7 @@ protected final String syntheticSource(DocumentMapper mapper, CheckedConsumer metrics; + private final Map metricDocValues = new EnumMap<>(Metric.class); + private final Set metricHasValue = EnumSet.noneOf(Metric.class); protected AggregateMetricSyntheticFieldLoader(String name, String simpleName, EnumSet metrics) { this.name = name; @@ -699,37 +703,49 @@ protected AggregateMetricSyntheticFieldLoader(String name, String simpleName, En } @Override - public Leaf leaf(LeafReader reader, int[] docIdsInLeaf) throws IOException { - Map metricDocValues = new EnumMap<>(Metric.class); + public Stream> storedFieldLoaders() { + return Stream.of(); + } + + @Override + public DocValuesLoader docValuesLoader(LeafReader reader, int[] docIdsInLeaf) throws IOException { + metricDocValues.clear(); for (Metric m : metrics) { String fieldName = subfieldName(name, m); - SortedNumericDocValues dv = NumberFieldMapper.NumericSyntheticFieldLoader.docValuesOrNull(reader, fieldName); + SortedNumericDocValues dv = SortedNumericDocValuesSyntheticFieldLoader.docValuesOrNull(reader, fieldName); if (dv != null) { metricDocValues.put(m, dv); } } if (metricDocValues.isEmpty()) { - return SourceLoader.SyntheticFieldLoader.NOTHING_LEAF; + return null; } - return new AggregateMetricSyntheticFieldLoader.ImmediateLeaf(metricDocValues); + return new AggregateDocValuesLoader(); } - private class ImmediateLeaf implements Leaf { - private final Map metricDocValues; - private final Set metricHasValue = EnumSet.noneOf(Metric.class); - - ImmediateLeaf(Map metricDocValues) { - assert metricDocValues.isEmpty() == false : "doc_values for metrics cannot be empty"; - this.metricDocValues = metricDocValues; + @Override + public void write(XContentBuilder b) throws IOException { + if (metricHasValue.isEmpty()) { + return; } - - @Override - public boolean empty() { - return false; + b.startObject(simpleName); + for (Map.Entry entry : metricDocValues.entrySet()) { + if (metricHasValue.contains(entry.getKey())) { + String metricName = entry.getKey().name(); + long value = entry.getValue().nextValue(); + if (entry.getKey() == Metric.value_count) { + b.field(metricName, value); + } else { + b.field(metricName, NumericUtils.sortableLongToDouble(value)); + } + } } + b.endObject(); + } + private class AggregateDocValuesLoader implements DocValuesLoader { @Override public boolean advanceToDoc(int docId) throws IOException { // It is required that all defined metrics must exist. In this case @@ -745,26 +761,6 @@ public boolean advanceToDoc(int docId) throws IOException { return metricHasValue.isEmpty() == false; } - - @Override - public void write(XContentBuilder b) throws IOException { - if (metricHasValue.isEmpty()) { - return; - } - b.startObject(simpleName); - for (Map.Entry entry : metricDocValues.entrySet()) { - if (metricHasValue.contains(entry.getKey())) { - String metricName = entry.getKey().name(); - long value = entry.getValue().nextValue(); - if (entry.getKey() == Metric.value_count) { - b.field(metricName, value); - } else { - b.field(metricName, NumericUtils.sortableLongToDouble(value)); - } - } - } - b.endObject(); - } } } } diff --git a/x-pack/plugin/mapper-constant-keyword/src/main/java/org/elasticsearch/xpack/constantkeyword/mapper/ConstantKeywordFieldMapper.java b/x-pack/plugin/mapper-constant-keyword/src/main/java/org/elasticsearch/xpack/constantkeyword/mapper/ConstantKeywordFieldMapper.java index da1cdd9e78a33..a070b828aa32b 100644 --- a/x-pack/plugin/mapper-constant-keyword/src/main/java/org/elasticsearch/xpack/constantkeyword/mapper/ConstantKeywordFieldMapper.java +++ b/x-pack/plugin/mapper-constant-keyword/src/main/java/org/elasticsearch/xpack/constantkeyword/mapper/ConstantKeywordFieldMapper.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.constantkeyword.mapper; +import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; @@ -51,6 +52,7 @@ import java.util.Locale; import java.util.Map; import java.util.Objects; +import java.util.stream.Stream; /** * A {@link FieldMapper} that assigns every document the same value. @@ -309,15 +311,25 @@ protected String contentType() { @Override public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { - return (reader, docIdsInLeaf) -> new SourceLoader.SyntheticFieldLoader.Leaf() { + return new SourceLoader.SyntheticFieldLoader() { @Override - public boolean empty() { - return fieldType().value == null; + public Stream> storedFieldLoaders() { + return Stream.of(); } @Override - public boolean advanceToDoc(int docId) throws IOException { - return fieldType().value != null; + public DocValuesLoader docValuesLoader(LeafReader reader, int[] docIdsInLeaf) { + /* + * If there is a value we need to enable objects containing these + * fields. We could build something special for fields that are + * always "on", but constant_keyword fields are rare enough that + * having an extra doc values loader that always returns `true` + * isn't a big performance hit and gets the job done. + */ + if (fieldType().value == null) { + return null; + } + return docId -> true; } @Override