diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java index a1d638616f2aa..5e5814528fcd2 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java @@ -34,6 +34,7 @@ import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType; import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator; +import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedNumericStarTreeValuesIterator; import org.opensearch.index.mapper.DocCountFieldMapper; import org.opensearch.index.mapper.FieldMapper; import org.opensearch.index.mapper.FieldValueConverter; @@ -193,7 +194,9 @@ public List getMetricReaders(SegmentWriteState stat metricFieldInfo = getFieldInfo(metric.getField(), DocValuesType.SORTED_NUMERIC); } metricReader = new SequentialDocValuesIterator( - fieldProducerMap.get(metricFieldInfo.name).getSortedNumeric(metricFieldInfo) + new SortedNumericStarTreeValuesIterator( + fieldProducerMap.get(metricFieldInfo.name).getSortedNumeric(metricFieldInfo) + ) ); } metricReaders.add(metricReader); @@ -228,7 +231,7 @@ public void build( dimensionFieldInfo = getFieldInfo(dimension, DocValuesType.SORTED_NUMERIC); } dimensionReaders[i] = new SequentialDocValuesIterator( - fieldProducerMap.get(dimensionFieldInfo.name).getSortedNumeric(dimensionFieldInfo) + new SortedNumericStarTreeValuesIterator(fieldProducerMap.get(dimensionFieldInfo.name).getSortedNumeric(dimensionFieldInfo)) ); } Iterator starTreeDocumentIterator = sortAndAggregateSegmentDocuments(dimensionReaders, metricReaders); @@ -287,7 +290,7 @@ void appendDocumentsToStarTree(Iterator starTreeDocumentIterat } } - private void serializeStarTree(int numSegmentStarTreeDocument, int numStarTreeDocs) throws IOException { + private void serializeStarTree(int numSegmentStarTreeDocuments, int numStarTreeDocs) throws IOException { // serialize the star tree data long dataFilePointer = dataOut.getFilePointer(); StarTreeWriter starTreeWriter = new StarTreeWriter(); @@ -299,7 +302,7 @@ private void serializeStarTree(int numSegmentStarTreeDocument, int numStarTreeDo starTreeField, metricAggregatorInfos, numStarTreeNodes, - numSegmentStarTreeDocument, + numSegmentStarTreeDocuments, numStarTreeDocs, dataFilePointer, totalStarTreeDataLength @@ -400,22 +403,20 @@ protected StarTreeDocument getStarTreeDocument( ) throws IOException { Long[] dims = new Long[numDimensions]; int i = 0; - for (SequentialDocValuesIterator dimensionDocValueIterator : dimensionReaders) { - dimensionDocValueIterator.nextDoc(currentDocId); - Long val = dimensionDocValueIterator.value(currentDocId); + for (SequentialDocValuesIterator dimensionValueIterator : dimensionReaders) { + dimensionValueIterator.nextEntry(currentDocId); + Long val = dimensionValueIterator.value(currentDocId); dims[i] = val; i++; } i = 0; Object[] metrics = new Object[metricReaders.size()]; - for (SequentialDocValuesIterator metricDocValuesIterator : metricReaders) { - metricDocValuesIterator.nextDoc(currentDocId); + for (SequentialDocValuesIterator metricValuesIterator : metricReaders) { + metricValuesIterator.nextEntry(currentDocId); // As part of merge, we traverse the star tree doc values // The type of data stored in metric fields is different from the // actual indexing field they're based on - metrics[i] = metricAggregatorInfos.get(i) - .getValueAggregators() - .toAggregatedValueType(metricDocValuesIterator.value(currentDocId)); + metrics[i] = metricAggregatorInfos.get(i).getValueAggregators().toAggregatedValueType(metricValuesIterator.value(currentDocId)); i++; } return new StarTreeDocument(dims, metrics); @@ -502,7 +503,7 @@ Long[] getStarTreeDimensionsFromSegment(int currentDocId, SequentialDocValuesIte for (int i = 0; i < numDimensions; i++) { if (dimensionReaders[i] != null) { try { - dimensionReaders[i].nextDoc(currentDocId); + dimensionReaders[i].nextEntry(currentDocId); } catch (IOException e) { logger.error("unable to iterate to next doc", e); throw new RuntimeException("unable to iterate to next doc", e); @@ -530,7 +531,7 @@ private Object[] getStarTreeMetricsFromSegment(int currentDocId, List mergeStarTrees(List starTreeValuesSub .size()]; for (int i = 0; i < dimensionsSplitOrder.size(); i++) { String dimension = dimensionsSplitOrder.get(i).getField(); - dimensionReaders[i] = new SequentialDocValuesIterator(starTreeValues.getDimensionDocIdSetIterator(dimension)); + dimensionReaders[i] = new SequentialDocValuesIterator(starTreeValues.getDimensionValuesIterator(dimension)); } List metricReaders = new ArrayList<>(); // get doc id set iterators for metrics @@ -164,7 +164,7 @@ Iterator mergeStarTrees(List starTreeValuesSub metric.getField(), metricStat.getTypeName() ); - metricReaders.add(new SequentialDocValuesIterator(starTreeValues.getMetricDocIdSetIterator(metricFullName))); + metricReaders.add(new SequentialDocValuesIterator(starTreeValues.getMetricValuesIterator(metricFullName))); } } int currentDocId = 0; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java index 1a5c906ad413b..13c6d03c4dc3d 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java @@ -138,7 +138,7 @@ StarTreeDocument[] getSegmentsStarTreeDocuments(List starTreeVal for (int i = 0; i < dimensionsSplitOrder.size(); i++) { String dimension = dimensionsSplitOrder.get(i).getField(); - dimensionReaders[i] = new SequentialDocValuesIterator(starTreeValues.getDimensionDocIdSetIterator(dimension)); + dimensionReaders[i] = new SequentialDocValuesIterator(starTreeValues.getDimensionValuesIterator(dimension)); } List metricReaders = new ArrayList<>(); @@ -150,7 +150,7 @@ StarTreeDocument[] getSegmentsStarTreeDocuments(List starTreeVal metric.getField(), metricStat.getTypeName() ); - metricReaders.add(new SequentialDocValuesIterator(starTreeValues.getMetricDocIdSetIterator(metricFullName))); + metricReaders.add(new SequentialDocValuesIterator(starTreeValues.getMetricValuesIterator(metricFullName))); } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeDocsFileManager.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeDocsFileManager.java index 779ed77b0540a..15ed153249243 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeDocsFileManager.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeDocsFileManager.java @@ -21,7 +21,6 @@ import java.io.Closeable; import java.io.IOException; -import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -55,11 +54,9 @@ public class StarTreeDocsFileManager extends AbstractDocumentsFileManager implem private RandomAccessInput starTreeDocsFileRandomInput; private IndexOutput starTreeDocsFileOutput; private final Map fileToEndDocIdMap; - private final List starTreeDocumentOffsets = new ArrayList<>(); private int currentFileStartDocId; private int numReadableStarTreeDocuments; private int starTreeFileCount = -1; - private int currBytes = 0; private final int fileCountMergeThreshold; private int numStarTreeDocs = 0; @@ -98,7 +95,11 @@ IndexOutput createStarTreeDocumentsFileOutput() throws IOException { public void writeStarTreeDocument(StarTreeDocument starTreeDocument, boolean isAggregatedDoc) throws IOException { assert isAggregatedDoc == true; int numBytes = writeStarTreeDocument(starTreeDocument, starTreeDocsFileOutput, true); - addStarTreeDocumentOffset(numBytes); + if (docSizeInBytes == -1) { + docSizeInBytes = numBytes; + } else { + assert docSizeInBytes == numBytes; + } numStarTreeDocs++; } @@ -106,7 +107,14 @@ public void writeStarTreeDocument(StarTreeDocument starTreeDocument, boolean isA public StarTreeDocument readStarTreeDocument(int docId, boolean isAggregatedDoc) throws IOException { assert isAggregatedDoc == true; ensureDocumentReadable(docId); - return readStarTreeDocument(starTreeDocsFileRandomInput, starTreeDocumentOffsets.get(docId), true); + return readStarTreeDocument(starTreeDocsFileRandomInput, getOffset(docId), true); + } + + /** + * Returns offset for the docId based on the current file start id + */ + private long getOffset(int docId) { + return (long) (docId - currentFileStartDocId) * docSizeInBytes; } @Override @@ -119,19 +127,10 @@ public Long getDimensionValue(int docId, int dimensionId) throws IOException { public Long[] readDimensions(int docId) throws IOException { ensureDocumentReadable(docId); Long[] dims = new Long[starTreeField.getDimensionsOrder().size()]; - readDimensions(dims, starTreeDocsFileRandomInput, starTreeDocumentOffsets.get(docId)); + readDimensions(dims, starTreeDocsFileRandomInput, getOffset(docId)); return dims; } - private void addStarTreeDocumentOffset(int bytes) { - starTreeDocumentOffsets.add(currBytes); - currBytes += bytes; - if (docSizeInBytes == -1) { - docSizeInBytes = bytes; - } - assert docSizeInBytes == bytes; - } - /** * Load the correct StarTreeDocuments file based on the docId */ @@ -199,7 +198,6 @@ private void loadStarTreeDocumentFile(int docId) throws IOException { * If the operation is only for reading existing documents, a new file is not created. */ private void closeAndMaybeCreateNewFile(boolean shouldCreateFileForAppend, int numStarTreeDocs) throws IOException { - currBytes = 0; if (starTreeDocsFileOutput != null) { fileToEndDocIdMap.put(starTreeDocsFileOutput.getName(), numStarTreeDocs); IOUtils.close(starTreeDocsFileOutput); @@ -232,7 +230,6 @@ private void mergeFiles(int numStarTreeDocs) throws IOException { deleteOldFiles(); fileToEndDocIdMap.clear(); fileToEndDocIdMap.put(mergedOutput.getName(), numStarTreeDocs); - resetStarTreeDocumentOffsets(); } } @@ -259,17 +256,6 @@ private void deleteOldFiles() throws IOException { } } - /** - * Reset the star tree document offsets based on the merged file - */ - private void resetStarTreeDocumentOffsets() { - int curr = 0; - for (int i = 0; i < starTreeDocumentOffsets.size(); i++) { - starTreeDocumentOffsets.set(i, curr); - curr += docSizeInBytes; - } - } - @Override public void close() { try { @@ -288,7 +274,6 @@ public void close() { tmpDirectory.deleteFile(file); } catch (IOException ignored) {} // similar to IOUtils.deleteFilesIgnoringExceptions } - starTreeDocumentOffsets.clear(); fileToEndDocIdMap.clear(); } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/index/StarTreeValues.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/index/StarTreeValues.java index a34bbbe9ee738..255ad343cde32 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/index/StarTreeValues.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/index/StarTreeValues.java @@ -12,7 +12,6 @@ import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SortedNumericDocValues; -import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.IndexInput; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.compositeindex.CompositeIndexMetadata; @@ -25,6 +24,8 @@ import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata; import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeFactory; import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNode; +import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedNumericStarTreeValuesIterator; +import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.StarTreeValuesIterator; import java.io.IOException; import java.util.ArrayList; @@ -59,14 +60,14 @@ public class StarTreeValues implements CompositeIndexValues { private final StarTreeNode root; /** - * A map containing suppliers for DocIdSetIterators for dimensions. + * A map containing suppliers for StarTreeValues iterators for dimensions. */ - private final Map> dimensionDocValuesIteratorMap; + private final Map> dimensionValuesIteratorMap; /** - * A map containing suppliers for DocIdSetIterators for metrics. + * A map containing suppliers for StarTreeValues iterators for metrics. */ - private final Map> metricDocValuesIteratorMap; + private final Map> metricValuesIteratorMap; /** * A map containing attributes associated with the star tree values. @@ -84,22 +85,22 @@ public class StarTreeValues implements CompositeIndexValues { * * @param starTreeField The StarTreeField object representing the star tree field configuration. * @param root The root node of the star tree. - * @param dimensionDocValuesIteratorMap A map containing suppliers for DocIdSetIterators for dimensions. - * @param metricDocValuesIteratorMap A map containing suppliers for DocIdSetIterators for metrics. + * @param dimensionValuesIteratorMap A map containing suppliers for StarTreeValues iterators for dimensions. + * @param metricValuesIteratorMap A map containing suppliers for StarTreeValues iterators for metrics. * @param attributes A map containing attributes associated with the star tree values. */ public StarTreeValues( StarTreeField starTreeField, StarTreeNode root, - Map> dimensionDocValuesIteratorMap, - Map> metricDocValuesIteratorMap, + Map> dimensionValuesIteratorMap, + Map> metricValuesIteratorMap, Map attributes, StarTreeMetadata compositeIndexMetadata ) { this.starTreeField = starTreeField; this.root = root; - this.dimensionDocValuesIteratorMap = dimensionDocValuesIteratorMap; - this.metricDocValuesIteratorMap = metricDocValuesIteratorMap; + this.dimensionValuesIteratorMap = dimensionValuesIteratorMap; + this.metricValuesIteratorMap = metricValuesIteratorMap; this.attributes = attributes; this.starTreeMetadata = compositeIndexMetadata; } @@ -146,12 +147,12 @@ public StarTreeValues( this.root = StarTreeFactory.createStarTree(compositeIndexDataIn, starTreeMetadata); // get doc id set iterators for metrics and dimensions - dimensionDocValuesIteratorMap = new LinkedHashMap<>(); - metricDocValuesIteratorMap = new LinkedHashMap<>(); + dimensionValuesIteratorMap = new LinkedHashMap<>(); + metricValuesIteratorMap = new LinkedHashMap<>(); // get doc id set iterators for dimensions for (String dimension : starTreeMetadata.getDimensionFields()) { - dimensionDocValuesIteratorMap.put(dimension, () -> { + dimensionValuesIteratorMap.put(dimension, () -> { try { SortedNumericDocValues dimensionSortedNumericDocValues = null; if (readState != null) { @@ -162,9 +163,9 @@ public StarTreeValues( dimensionSortedNumericDocValues = compositeDocValuesProducer.getSortedNumeric(dimensionfieldInfo); } } - return getSortedNumericDocValues(dimensionSortedNumericDocValues); + return new SortedNumericStarTreeValuesIterator(getSortedNumericDocValues(dimensionSortedNumericDocValues)); } catch (IOException e) { - throw new RuntimeException("Error loading dimension DocIdSetIterator", e); + throw new RuntimeException("Error loading dimension StarTreeValuesIterator", e); } }); } @@ -177,7 +178,7 @@ public StarTreeValues( metric.getField(), metricStat.getTypeName() ); - metricDocValuesIteratorMap.put(metricFullName, () -> { + metricValuesIteratorMap.put(metricFullName, () -> { try { SortedNumericDocValues metricSortedNumericDocValues = null; if (readState != null) { @@ -186,7 +187,7 @@ public StarTreeValues( metricSortedNumericDocValues = compositeDocValuesProducer.getSortedNumeric(metricFieldInfo); } } - return getSortedNumericDocValues(metricSortedNumericDocValues); + return new SortedNumericStarTreeValuesIterator(getSortedNumericDocValues(metricSortedNumericDocValues)); } catch (IOException e) { throw new RuntimeException("Error loading metric DocIdSetIterator", e); } @@ -239,30 +240,30 @@ public Map getAttributes() { } /** - * Returns the DocIdSetIterator for the specified dimension. + * Returns the StarTreeValues iterator for the specified dimension. * * @param dimension The name of the dimension. - * @return The DocIdSetIterator for the specified dimension. + * @return The StarTreeValuesIterator for the specified dimension. */ - public DocIdSetIterator getDimensionDocIdSetIterator(String dimension) { + public StarTreeValuesIterator getDimensionValuesIterator(String dimension) { - if (dimensionDocValuesIteratorMap.containsKey(dimension)) { - return dimensionDocValuesIteratorMap.get(dimension).get(); + if (dimensionValuesIteratorMap.containsKey(dimension)) { + return dimensionValuesIteratorMap.get(dimension).get(); } throw new IllegalArgumentException("dimension [" + dimension + "] does not exist in the segment."); } /** - * Returns the DocIdSetIterator for the specified fully qualified metric name. + * Returns the StarTreeValues iterator for the specified fully qualified metric name. * * @param fullyQualifiedMetricName The fully qualified name of the metric. - * @return The DocIdSetIterator for the specified fully qualified metric name. + * @return The StarTreeValuesIterator for the specified fully qualified metric name. */ - public DocIdSetIterator getMetricDocIdSetIterator(String fullyQualifiedMetricName) { + public StarTreeValuesIterator getMetricValuesIterator(String fullyQualifiedMetricName) { - if (metricDocValuesIteratorMap.containsKey(fullyQualifiedMetricName)) { - return metricDocValuesIteratorMap.get(fullyQualifiedMetricName).get(); + if (metricValuesIteratorMap.containsKey(fullyQualifiedMetricName)) { + return metricValuesIteratorMap.get(fullyQualifiedMetricName).get(); } throw new IllegalArgumentException("metric [" + fullyQualifiedMetricName + "] does not exist in the segment."); diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIterator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIterator.java index 061841d3e140a..9029a451ca4d9 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIterator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIterator.java @@ -9,15 +9,19 @@ package org.opensearch.index.compositeindex.datacube.startree.utils; -import org.apache.lucene.index.SortedNumericDocValues; -import org.apache.lucene.search.DocIdSetIterator; import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedNumericStarTreeValuesIterator; +import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.StarTreeValuesIterator; import java.io.IOException; /** - * Coordinates the reading of documents across multiple DocIdSetIterators. - * It encapsulates a single DocIdSetIterator and maintains the latest document ID and its associated value. + * Coordinates the reading of documents across multiple StarTreeValuesIterator. + * It encapsulates a single StarTreeValuesIterator and maintains the latest document ID and its associated value. + * + * In case of merge , this will be reading the entries of star tree values and in case of flush this will go through + * the actual segment documents. + * * @opensearch.experimental */ @ExperimentalApi @@ -26,76 +30,56 @@ public class SequentialDocValuesIterator { /** * The doc id set iterator associated for each field. */ - private final DocIdSetIterator docIdSetIterator; + private final StarTreeValuesIterator starTreeValuesIterator; /** - * The value associated with the latest document. + * The id of the latest record/entry. */ - private Long docValue; + private int entryId = -1; - /** - * The id of the latest document. - */ - private int docId = -1; - - /** - * Constructs a new SequentialDocValuesIterator instance with the given DocIdSetIterator. - * - * @param docIdSetIterator the DocIdSetIterator to be associated with this instance - */ - public SequentialDocValuesIterator(DocIdSetIterator docIdSetIterator) { - this.docIdSetIterator = docIdSetIterator; - } - - /** - * Returns the id of the latest document. - * - * @return the id of the latest document - */ - public int getDocId() { - return docId; + public SequentialDocValuesIterator(StarTreeValuesIterator starTreeValuesIterator) { + this.starTreeValuesIterator = starTreeValuesIterator; } /** - * Sets the id of the latest document. + * Returns the ID of the star tree record/entry or the segment document id * - * @param docId the ID of the latest document + * @return the ID of the star tree record/entry or the segment document id */ - private void setDocId(int docId) { - this.docId = docId; + public int getEntryId() { + return entryId; } /** - * Returns the DocIdSetIterator associated with this instance. + * Sets the id of the latest entry. * - * @return the DocIdSetIterator associated with this instance + * @param entryId the ID of the star tree record/entry or the segment document id */ - public DocIdSetIterator getDocIdSetIterator() { - return docIdSetIterator; + private void setEntryId(int entryId) { + this.entryId = entryId; } - public int nextDoc(int currentDocId) throws IOException { + public int nextEntry(int currentEntryId) throws IOException { // if doc id stored is less than or equal to the requested doc id , return the stored doc id - if (docId >= currentDocId) { - return docId; + if (entryId >= currentEntryId) { + return entryId; } - setDocId(this.docIdSetIterator.nextDoc()); - return docId; + setEntryId(this.starTreeValuesIterator.nextEntry()); + return entryId; } - public Long value(int currentDocId) throws IOException { - if (this.getDocIdSetIterator() instanceof SortedNumericDocValues) { - SortedNumericDocValues sortedNumericDocValues = (SortedNumericDocValues) this.getDocIdSetIterator(); - if (currentDocId < 0) { - throw new IllegalStateException("invalid doc id to fetch the next value"); + public Long value(int currentEntryId) throws IOException { + if (starTreeValuesIterator instanceof SortedNumericStarTreeValuesIterator) { + if (currentEntryId < 0) { + throw new IllegalStateException("invalid entry id to fetch the next value"); } - if (currentDocId == DocIdSetIterator.NO_MORE_DOCS) { - throw new IllegalStateException("DocValuesIterator is already exhausted"); + if (currentEntryId == StarTreeValuesIterator.NO_MORE_ENTRIES) { + throw new IllegalStateException("StarTreeValuesIterator is already exhausted"); } - if (docId == DocIdSetIterator.NO_MORE_DOCS || docId != currentDocId) { + if (entryId == StarTreeValuesIterator.NO_MORE_ENTRIES || entryId != currentEntryId) { return null; } - return sortedNumericDocValues.nextValue(); + return ((SortedNumericStarTreeValuesIterator) starTreeValuesIterator).nextValue(); } else { throw new IllegalStateException("Unsupported Iterator requested for SequentialDocValuesIterator"); diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/iterator/SortedNumericStarTreeValuesIterator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/iterator/SortedNumericStarTreeValuesIterator.java new file mode 100644 index 0000000000000..27afdf1479b4e --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/iterator/SortedNumericStarTreeValuesIterator.java @@ -0,0 +1,32 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.utils.iterator; + +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.search.DocIdSetIterator; +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.IOException; + +/** + * Wrapper iterator class for StarTree index to traverse through SortedNumericDocValues + * + * @opensearch.experimental + */ +@ExperimentalApi +public class SortedNumericStarTreeValuesIterator extends StarTreeValuesIterator { + + public SortedNumericStarTreeValuesIterator(DocIdSetIterator docIdSetIterator) { + super(docIdSetIterator); + } + + public long nextValue() throws IOException { + return ((SortedNumericDocValues) docIdSetIterator).nextValue(); + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/iterator/StarTreeValuesIterator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/iterator/StarTreeValuesIterator.java new file mode 100644 index 0000000000000..32866f3e50092 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/iterator/StarTreeValuesIterator.java @@ -0,0 +1,48 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.utils.iterator; + +import org.apache.lucene.search.DocIdSetIterator; +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.IOException; + +/** + * Wrapper iterator class for StarTree index in place of DocIdSetIterator to read / traverse the docValues formats. + * This is needed since star tree values are different from segment documents and number of star tree values + * can even exceed segment docs in the worst cases. + * + * @opensearch.experimental + */ +@ExperimentalApi +public abstract class StarTreeValuesIterator { + + public static final int NO_MORE_ENTRIES = Integer.MAX_VALUE; + protected final DocIdSetIterator docIdSetIterator; + + public StarTreeValuesIterator(DocIdSetIterator docIdSetIterator) { + this.docIdSetIterator = docIdSetIterator; + } + + public int entryId() { + return docIdSetIterator.docID(); + } + + public int nextEntry() throws IOException { + return docIdSetIterator.nextDoc(); + } + + public int advance(int target) throws IOException { + return docIdSetIterator.advance(target); + } + + public long cost() { + return docIdSetIterator.cost(); + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/iterator/package-info.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/iterator/package-info.java new file mode 100644 index 0000000000000..3c6444a4a5cac --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/iterator/package-info.java @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * This contains classes for StarTreeValues iterators + * + * @opensearch.experimental + */ +package org.opensearch.index.compositeindex.datacube.startree.utils.iterator; diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeTestUtils.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeTestUtils.java index b7395b993f67b..7cae1cd25ee93 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeTestUtils.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeTestUtils.java @@ -54,7 +54,7 @@ public static StarTreeDocument[] getSegmentsStarTreeDocuments( for (int i = 0; i < dimensionsSplitOrder.size(); i++) { String dimension = dimensionsSplitOrder.get(i).getField(); - dimensionReaders[i] = new SequentialDocValuesIterator(starTreeValues.getDimensionDocIdSetIterator(dimension)); + dimensionReaders[i] = new SequentialDocValuesIterator(starTreeValues.getDimensionValuesIterator(dimension)); } List metricReaders = new ArrayList<>(); @@ -69,7 +69,7 @@ public static StarTreeDocument[] getSegmentsStarTreeDocuments( metric.getField(), metricStat.getTypeName() ); - metricReaders.add(new SequentialDocValuesIterator(starTreeValues.getMetricDocIdSetIterator(metricFullName))); + metricReaders.add(new SequentialDocValuesIterator(starTreeValues.getMetricValuesIterator(metricFullName))); } } @@ -92,7 +92,7 @@ public static StarTreeDocument getStarTreeDocument( Long[] dims = new Long[dimensionReaders.length]; int i = 0; for (SequentialDocValuesIterator dimensionDocValueIterator : dimensionReaders) { - dimensionDocValueIterator.nextDoc(currentDocId); + dimensionDocValueIterator.nextEntry(currentDocId); Long val = dimensionDocValueIterator.value(currentDocId); dims[i] = val; i++; @@ -100,7 +100,7 @@ public static StarTreeDocument getStarTreeDocument( i = 0; Object[] metrics = new Object[metricReaders.size()]; for (SequentialDocValuesIterator metricDocValuesIterator : metricReaders) { - metricDocValuesIterator.nextDoc(currentDocId); + metricDocValuesIterator.nextEntry(currentDocId); metrics[i] = toAggregatorValueType(metricDocValuesIterator.value(currentDocId), fieldValueConverters.get(i)); i++; } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java index 65adc43ea8bea..b77200f173e71 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java @@ -53,6 +53,8 @@ import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType; import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator; import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; +import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedNumericStarTreeValuesIterator; +import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.StarTreeValuesIterator; import org.opensearch.index.mapper.ContentPath; import org.opensearch.index.mapper.DocumentMapper; import org.opensearch.index.mapper.FieldValueConverter; @@ -395,7 +397,9 @@ SequentialDocValuesIterator[] getDimensionIterators(StarTreeDocument[] starTreeD docsWithField.add(i); } } - sequentialDocValuesIterators[j] = new SequentialDocValuesIterator(getSortedNumericMock(dimList, docsWithField)); + sequentialDocValuesIterators[j] = new SequentialDocValuesIterator( + new SortedNumericStarTreeValuesIterator(getSortedNumericMock(dimList, docsWithField)) + ); } return sequentialDocValuesIterators; } @@ -412,7 +416,9 @@ List getMetricIterators(StarTreeDocument[] starTree docsWithField.add(i); } } - sequentialDocValuesIterators.add(new SequentialDocValuesIterator(getSortedNumericMock(metricslist, docsWithField))); + sequentialDocValuesIterators.add( + new SequentialDocValuesIterator(new SortedNumericStarTreeValuesIterator(getSortedNumericMock(metricslist, docsWithField))) + ); } return sequentialDocValuesIterators; } @@ -1985,10 +1991,16 @@ public void testFlushFlow() throws IOException { List metricsWithField = List.of(0, 1, 2, 3, 4, 5); compositeField = getStarTreeFieldWithMultipleMetrics(); - SortedNumericDocValues d1sndv = getSortedNumericMock(dimList, docsWithField); - SortedNumericDocValues d2sndv = getSortedNumericMock(dimList2, docsWithField2); - SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); - SortedNumericDocValues m2sndv = getSortedNumericMock(metricsList, metricsWithField); + SortedNumericStarTreeValuesIterator d1sndv = new SortedNumericStarTreeValuesIterator(getSortedNumericMock(dimList, docsWithField)); + SortedNumericStarTreeValuesIterator d2sndv = new SortedNumericStarTreeValuesIterator( + getSortedNumericMock(dimList2, docsWithField2) + ); + SortedNumericStarTreeValuesIterator m1sndv = new SortedNumericStarTreeValuesIterator( + getSortedNumericMock(metricsList, metricsWithField) + ); + SortedNumericStarTreeValuesIterator m2sndv = new SortedNumericStarTreeValuesIterator( + getSortedNumericMock(metricsList, metricsWithField) + ); writeState = getWriteState(6, writeState.segmentInfo.getId()); builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); @@ -2081,10 +2093,16 @@ public void testFlushFlowDimsReverse() throws IOException { List metricsWithField = List.of(0, 1, 2, 3, 4, 5); compositeField = getStarTreeFieldWithMultipleMetrics(); - SortedNumericDocValues d1sndv = getSortedNumericMock(dimList, docsWithField); - SortedNumericDocValues d2sndv = getSortedNumericMock(dimList2, docsWithField2); - SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); - SortedNumericDocValues m2sndv = getSortedNumericMock(metricsList, metricsWithField); + SortedNumericStarTreeValuesIterator d1sndv = new SortedNumericStarTreeValuesIterator(getSortedNumericMock(dimList, docsWithField)); + SortedNumericStarTreeValuesIterator d2sndv = new SortedNumericStarTreeValuesIterator( + getSortedNumericMock(dimList2, docsWithField2) + ); + SortedNumericStarTreeValuesIterator m1sndv = new SortedNumericStarTreeValuesIterator( + getSortedNumericMock(metricsList, metricsWithField) + ); + SortedNumericStarTreeValuesIterator m2sndv = new SortedNumericStarTreeValuesIterator( + getSortedNumericMock(metricsList, metricsWithField) + ); writeState = getWriteState(6, writeState.segmentInfo.getId()); this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( @@ -2508,9 +2526,14 @@ private StarTreeValues getStarTreeValues( SortedNumericDocValues d1sndv = dimList; SortedNumericDocValues d2sndv = dimList2; SortedNumericDocValues m1sndv = metricsList; - Map> dimDocIdSetIterators = Map.of("field1", () -> d1sndv, "field3", () -> d2sndv); + Map> dimDocIdSetIterators = Map.of( + "field1", + () -> new SortedNumericStarTreeValuesIterator(d1sndv), + "field3", + () -> new SortedNumericStarTreeValuesIterator(d2sndv) + ); - Map> metricDocIdSetIterators = new LinkedHashMap<>(); + Map> metricDocIdSetIterators = new LinkedHashMap<>(); for (Metric metric : sf.getMetrics()) { for (MetricStat metricStat : metric.getMetrics()) { String metricFullName = fullyQualifiedFieldNameForStarTreeMetricsDocValues( @@ -2518,7 +2541,7 @@ private StarTreeValues getStarTreeValues( metric.getField(), metricStat.getTypeName() ); - metricDocIdSetIterators.put(metricFullName, () -> m1sndv); + metricDocIdSetIterators.put(metricFullName, () -> new SortedNumericStarTreeValuesIterator(m1sndv)); } } @@ -3648,18 +3671,18 @@ private StarTreeValues getStarTreeValues( SortedNumericDocValues d4sndv = getSortedNumericMock(dimList4, docsWithField4); SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); SortedNumericDocValues m2sndv = getSortedNumericMock(metricsList1, metricsWithField1); - Map> dimDocIdSetIterators = Map.of( + Map> dimDocIdSetIterators = Map.of( "field1", - () -> d1sndv, + () -> new SortedNumericStarTreeValuesIterator(d1sndv), "field3", - () -> d2sndv, + () -> new SortedNumericStarTreeValuesIterator(d2sndv), "field5", - () -> d3sndv, + () -> new SortedNumericStarTreeValuesIterator(d3sndv), "field8", - () -> d4sndv + () -> new SortedNumericStarTreeValuesIterator(d4sndv) ); - Map> metricDocIdSetIterators = new LinkedHashMap<>(); + Map> metricDocIdSetIterators = new LinkedHashMap<>(); metricDocIdSetIterators.put( fullyQualifiedFieldNameForStarTreeMetricsDocValues( @@ -3667,7 +3690,7 @@ private StarTreeValues getStarTreeValues( "field2", sf.getMetrics().get(0).getMetrics().get(0).getTypeName() ), - () -> m1sndv + () -> new SortedNumericStarTreeValuesIterator(m1sndv) ); metricDocIdSetIterators.put( fullyQualifiedFieldNameForStarTreeMetricsDocValues( @@ -3675,7 +3698,7 @@ private StarTreeValues getStarTreeValues( "_doc_count", sf.getMetrics().get(1).getMetrics().get(0).getTypeName() ), - () -> m2sndv + () -> new SortedNumericStarTreeValuesIterator(m2sndv) ); // metricDocIdSetIterators.put("field2", () -> m1sndv); // metricDocIdSetIterators.put("_doc_count", () -> m2sndv); @@ -4093,24 +4116,24 @@ public void testMergeFlow() throws IOException { SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); SortedNumericDocValues valucountsndv = getSortedNumericMock(metricsListValueCount, metricsWithFieldValueCount); SortedNumericDocValues m2sndv = DocValues.emptySortedNumeric(); - Map> dimDocIdSetIterators = Map.of( + Map> dimDocIdSetIterators = Map.of( "field1", - () -> d1sndv, + () -> new SortedNumericStarTreeValuesIterator(d1sndv), "field3", - () -> d2sndv, + () -> new SortedNumericStarTreeValuesIterator(d2sndv), "field5", - () -> d3sndv, + () -> new SortedNumericStarTreeValuesIterator(d3sndv), "field8", - () -> d4sndv + () -> new SortedNumericStarTreeValuesIterator(d4sndv) ); - Map> metricDocIdSetIterators = Map.of( + Map> metricDocIdSetIterators = Map.of( "sf_field2_sum_metric", - () -> m1sndv, + () -> new SortedNumericStarTreeValuesIterator(m1sndv), "sf_field2_value_count_metric", - () -> valucountsndv, + () -> new SortedNumericStarTreeValuesIterator(valucountsndv), "sf__doc_count_doc_count_metric", - () -> m2sndv + () -> new SortedNumericStarTreeValuesIterator(m2sndv) ); StarTreeValues starTreeValues = new StarTreeValues( @@ -4129,24 +4152,24 @@ public void testMergeFlow() throws IOException { SortedNumericDocValues f2m1sndv = getSortedNumericMock(metricsList, metricsWithField); SortedNumericDocValues f2valucountsndv = getSortedNumericMock(metricsListValueCount, metricsWithFieldValueCount); SortedNumericDocValues f2m2sndv = DocValues.emptySortedNumeric(); - Map> f2dimDocIdSetIterators = Map.of( + Map> f2dimDocIdSetIterators = Map.of( "field1", - () -> f2d1sndv, + () -> new SortedNumericStarTreeValuesIterator(f2d1sndv), "field3", - () -> f2d2sndv, + () -> new SortedNumericStarTreeValuesIterator(f2d2sndv), "field5", - () -> f2d3sndv, + () -> new SortedNumericStarTreeValuesIterator(f2d3sndv), "field8", - () -> f2d4sndv + () -> new SortedNumericStarTreeValuesIterator(f2d4sndv) ); - Map> f2metricDocIdSetIterators = Map.of( + Map> f2metricDocIdSetIterators = Map.of( "sf_field2_sum_metric", - () -> f2m1sndv, + () -> new SortedNumericStarTreeValuesIterator(f2m1sndv), "sf_field2_value_count_metric", - () -> f2valucountsndv, + () -> new SortedNumericStarTreeValuesIterator(f2valucountsndv), "sf__doc_count_doc_count_metric", - () -> f2m2sndv + () -> new SortedNumericStarTreeValuesIterator(f2m2sndv) ); StarTreeValues starTreeValues2 = new StarTreeValues( compositeField, diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIteratorTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIteratorTests.java index f56f7d9906ae1..78d63800abd16 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIteratorTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIteratorTests.java @@ -9,15 +9,13 @@ package org.opensearch.index.compositeindex.datacube.startree.utils; import org.apache.lucene.codecs.DocValuesProducer; -import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.BytesRef; +import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedNumericStarTreeValuesIterator; import org.opensearch.test.OpenSearchTestCase; import org.junit.BeforeClass; @@ -59,50 +57,32 @@ public void testCreateIterator_SortedNumeric() throws IOException { DocValuesProducer producer = Mockito.mock(DocValuesProducer.class); SortedNumericDocValues iterator = Mockito.mock(SortedNumericDocValues.class); when(producer.getSortedNumeric(mockFieldInfo)).thenReturn(iterator); - SequentialDocValuesIterator result = new SequentialDocValuesIterator(producer.getSortedNumeric(mockFieldInfo)); - assertEquals(iterator.getClass(), result.getDocIdSetIterator().getClass()); - } - - public void testCreateIterator_UnsupportedType() throws IOException { - DocValuesProducer producer = Mockito.mock(DocValuesProducer.class); - BinaryDocValues iterator = Mockito.mock(BinaryDocValues.class); - when(producer.getBinary(mockFieldInfo)).thenReturn(iterator); - SequentialDocValuesIterator result = new SequentialDocValuesIterator(producer.getBinary(mockFieldInfo)); - assertEquals(iterator.getClass(), result.getDocIdSetIterator().getClass()); - when(iterator.nextDoc()).thenReturn(0); - when(iterator.binaryValue()).thenReturn(new BytesRef("123")); + SequentialDocValuesIterator result = new SequentialDocValuesIterator( + new SortedNumericStarTreeValuesIterator(producer.getSortedNumeric(mockFieldInfo)) + ); - IllegalStateException exception = expectThrows(IllegalStateException.class, () -> { - result.nextDoc(0); - result.value(0); - }); - assertEquals("Unsupported Iterator requested for SequentialDocValuesIterator", exception.getMessage()); } public void testGetNextValue_SortedNumeric() throws IOException { SortedNumericDocValues iterator = Mockito.mock(SortedNumericDocValues.class); when(iterator.nextDoc()).thenReturn(0); when(iterator.nextValue()).thenReturn(123L); - SequentialDocValuesIterator sequentialDocValuesIterator = new SequentialDocValuesIterator(iterator); - sequentialDocValuesIterator.nextDoc(0); + SequentialDocValuesIterator sequentialDocValuesIterator = new SequentialDocValuesIterator( + new SortedNumericStarTreeValuesIterator(iterator) + ); + sequentialDocValuesIterator.nextEntry(0); long result = sequentialDocValuesIterator.value(0); assertEquals(123L, result); } - public void testGetNextValue_UnsupportedIterator() { - DocIdSetIterator iterator = Mockito.mock(DocIdSetIterator.class); - SequentialDocValuesIterator sequentialDocValuesIterator = new SequentialDocValuesIterator(iterator); - - IllegalStateException exception = expectThrows(IllegalStateException.class, () -> { sequentialDocValuesIterator.value(0); }); - assertEquals("Unsupported Iterator requested for SequentialDocValuesIterator", exception.getMessage()); - } - - public void testNextDoc() throws IOException { + public void testNextEntry() throws IOException { SortedNumericDocValues iterator = Mockito.mock(SortedNumericDocValues.class); - SequentialDocValuesIterator sequentialDocValuesIterator = new SequentialDocValuesIterator(iterator); + SequentialDocValuesIterator sequentialDocValuesIterator = new SequentialDocValuesIterator( + new SortedNumericStarTreeValuesIterator(iterator) + ); when(iterator.nextDoc()).thenReturn(5); - int result = sequentialDocValuesIterator.nextDoc(5); + int result = sequentialDocValuesIterator.nextEntry(5); assertEquals(5, result); } @@ -110,8 +90,12 @@ public void test_multipleCoordinatedDocumentReader() throws IOException { SortedNumericDocValues iterator1 = Mockito.mock(SortedNumericDocValues.class); SortedNumericDocValues iterator2 = Mockito.mock(SortedNumericDocValues.class); - SequentialDocValuesIterator sequentialDocValuesIterator1 = new SequentialDocValuesIterator(iterator1); - SequentialDocValuesIterator sequentialDocValuesIterator2 = new SequentialDocValuesIterator(iterator2); + SequentialDocValuesIterator sequentialDocValuesIterator1 = new SequentialDocValuesIterator( + new SortedNumericStarTreeValuesIterator(iterator1) + ); + SequentialDocValuesIterator sequentialDocValuesIterator2 = new SequentialDocValuesIterator( + new SortedNumericStarTreeValuesIterator(iterator2) + ); when(iterator1.nextDoc()).thenReturn(0); when(iterator2.nextDoc()).thenReturn(1); @@ -119,13 +103,13 @@ public void test_multipleCoordinatedDocumentReader() throws IOException { when(iterator1.nextValue()).thenReturn(9L); when(iterator2.nextValue()).thenReturn(9L); - sequentialDocValuesIterator1.nextDoc(0); - sequentialDocValuesIterator2.nextDoc(0); - assertEquals(0, sequentialDocValuesIterator1.getDocId()); + sequentialDocValuesIterator1.nextEntry(0); + sequentialDocValuesIterator2.nextEntry(0); + assertEquals(0, sequentialDocValuesIterator1.getEntryId()); assertEquals(9L, (long) sequentialDocValuesIterator1.value(0)); assertNull(sequentialDocValuesIterator2.value(0)); - assertNotEquals(0, sequentialDocValuesIterator2.getDocId()); - assertEquals(1, sequentialDocValuesIterator2.getDocId()); + assertNotEquals(0, sequentialDocValuesIterator2.getEntryId()); + assertEquals(1, sequentialDocValuesIterator2.getEntryId()); assertEquals(9L, (long) sequentialDocValuesIterator2.value(1)); } }