Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Build global ordinals terms bucket from matching ordinals #30166

Merged
merged 6 commits into from
Apr 27, 2018
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,8 @@

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.util.LongHash;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
Expand Down Expand Up @@ -103,11 +101,22 @@ public SignificantStringTerms buildAggregation(long owningBucketOrdinal) throws

BucketSignificancePriorityQueue<SignificantStringTerms.Bucket> ordered = new BucketSignificancePriorityQueue<>(size);
SignificantStringTerms.Bucket spare = null;
for (long globalTermOrd = 0; globalTermOrd < valueCount; ++globalTermOrd) {
if (includeExclude != null && !acceptedGlobalOrdinals.get(globalTermOrd)) {
boolean needsFullSan = bucketOrds == null || bucketCountThresholds.getMinDocCount() == 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo - "needsFullScan"

long maxId = needsFullSan ? valueCount : bucketOrds.size();
for (long ord = 0; ord < maxId; ord++) {
final long globalOrd;
final long bucketOrd;
if (needsFullSan) {
bucketOrd = bucketOrds == null ? ord : bucketOrds.find(ord);
globalOrd = ord;
} else {
assert bucketOrds != null;
bucketOrd = ord;
globalOrd = bucketOrds.get(ord);
}
if (includeExclude != null && !acceptedGlobalOrdinals.get(globalOrd)) {
continue;
}
final long bucketOrd = getBucketOrd(globalTermOrd);
final int bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
if (bucketCountThresholds.getMinDocCount() > 0 && bucketDocCount == 0) {
continue;
Expand All @@ -120,7 +129,7 @@ public SignificantStringTerms buildAggregation(long owningBucketOrdinal) throws
spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null, format);
}
spare.bucketOrd = bucketOrd;
copy(lookupGlobalOrd.apply(globalTermOrd), spare.termBytes);
copy(lookupGlobalOrd.apply(globalOrd), spare.termBytes);
spare.subsetDf = bucketDocCount;
spare.subsetSize = subsetSize;
spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.termBytes);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
protected final long valueCount;
protected final GlobalOrdLookupFunction lookupGlobalOrd;

private final LongHash bucketOrds;
protected final LongHash bucketOrds;

public interface GlobalOrdLookupFunction {
BytesRef apply(long ord) throws IOException;
Expand Down Expand Up @@ -107,10 +107,6 @@ boolean remapGlobalOrds() {
return bucketOrds != null;
}

protected final long getBucketOrd(long globalOrd) {
return bucketOrds == null ? globalOrd : bucketOrds.find(globalOrd);
}

private void collectGlobalOrd(int doc, long globalOrd, LeafBucketCollector sub) throws IOException {
if (bucketOrds == null) {
collectExistingBucket(sub, doc, globalOrd);
Expand Down Expand Up @@ -188,17 +184,28 @@ public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOE
long otherDocCount = 0;
BucketPriorityQueue<OrdBucket> ordered = new BucketPriorityQueue<>(size, order.comparator(this));
OrdBucket spare = new OrdBucket(-1, 0, null, showTermDocCountError, 0);
for (long globalTermOrd = 0; globalTermOrd < valueCount; ++globalTermOrd) {
if (includeExclude != null && !acceptedGlobalOrdinals.get(globalTermOrd)) {
boolean needsFullScan = bucketOrds == null || bucketCountThresholds.getMinDocCount() == 0;
long maxId = needsFullScan ? valueCount : bucketOrds.size();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's make them final?

for (long ord = 0; ord < maxId; ord++) {
final long globalOrd;
final long bucketOrd;
if (needsFullScan) {
bucketOrd = bucketOrds == null ? ord : bucketOrds.find(ord);
globalOrd = ord;
} else {
assert bucketOrds != null;
bucketOrd = ord;
globalOrd = bucketOrds.get(ord);
}
if (includeExclude != null && !acceptedGlobalOrdinals.get(globalOrd)) {
continue;
}
final long bucketOrd = getBucketOrd(globalTermOrd);
final int bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
if (bucketCountThresholds.getMinDocCount() > 0 && bucketDocCount == 0) {
continue;
}
otherDocCount += bucketDocCount;
spare.globalOrd = globalTermOrd;
spare.globalOrd = globalOrd;
spare.bucketOrd = bucketOrd;
spare.docCount = bucketDocCount;
if (bucketCountThresholds.getShardMinDocCount() <= spare.docCount) {
Expand Down Expand Up @@ -378,7 +385,7 @@ private void mapSegmentCountsToGlobalCounts(LongUnaryOperator mapping) throws IO
}
final long ord = i - 1; // remember we do +1 when counting
final long globalOrd = mapping.applyAsLong(ord);
long bucketOrd = getBucketOrd(globalOrd);
long bucketOrd = bucketOrds == null ? globalOrd : bucketOrds.find(globalOrd);
incrementBucketDocCount(bucketOrd, inc);
}
}
Expand Down