Skip to content

Commit

Permalink
Reduce memory pressure when sending large terms queries. (#21776)
Browse files Browse the repository at this point in the history
When users send large `terms` query to Elasticsearch, every value is stored in
an object. This change does not reduce the amount of created objects, but makes
sure these objects die young by optimizing the list storage in case all values
are either non-null instances of Long objects or BytesRef objects, which seems
to help the JVM significantly.
  • Loading branch information
jpountz committed Nov 30, 2016
1 parent 7290b2d commit 56faeb3
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ public void writeByte(byte b) throws IOException {
}

@Override
public void writeBytes(byte[] b, int offset, int length) throws IOException {
public void writeBytes(byte[] b, int offset, int length) {
// nothing to copy
if (length == 0) {
return;
Expand Down
139 changes: 114 additions & 25 deletions core/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,15 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.elasticsearch.action.get.GetRequest;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.lucene.BytesRefs;
Expand All @@ -43,11 +46,15 @@
import org.elasticsearch.indices.TermsLookup;

import java.io.IOException;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

Expand Down Expand Up @@ -80,7 +87,7 @@ public TermsQueryBuilder(String fieldName, TermsLookup termsLookup) {
throw new IllegalArgumentException("Both values and termsLookup specified for terms query");
}
this.fieldName = fieldName;
this.values = values;
this.values = values == null ? null : convert(values);
this.termsLookup = termsLookup;
}

Expand Down Expand Up @@ -159,7 +166,7 @@ public TermsQueryBuilder(String fieldName, Iterable<?> values) {
throw new IllegalArgumentException("No value specified for terms query");
}
this.fieldName = fieldName;
this.values = convertToBytesRefListIfStringList(values);
this.values = convert(values);
this.termsLookup = null;
}

Expand All @@ -185,43 +192,125 @@ public String fieldName() {
}

public List<Object> values() {
return convertToStringListIfBytesRefList(this.values);
return convertBack(this.values);
}

public TermsLookup termsLookup() {
return this.termsLookup;
}

private static final Set<Class<? extends Number>> INTEGER_TYPES = new HashSet<>(
Arrays.asList(Byte.class, Short.class, Integer.class, Long.class));
private static final Set<Class<?>> STRING_TYPES = new HashSet<>(
Arrays.asList(BytesRef.class, String.class));

/**
* Same as {@link #convertToBytesRefIfString} but on Iterable.
* @param objs the Iterable of input object
* @return the same input or a list of {@link BytesRef} representation if input was a list of type string
* Same as {@link #convert(List)} but on an {@link Iterable}.
*/
private static List<Object> convertToBytesRefListIfStringList(Iterable<?> objs) {
if (objs == null) {
return null;
}
List<Object> newObjs = new ArrayList<>();
for (Object obj : objs) {
newObjs.add(convertToBytesRefIfString(obj));
private static List<?> convert(Iterable<?> values) {
List<?> list;
if (values instanceof List<?>) {
list = (List<?>) values;
} else {
ArrayList<Object> arrayList = new ArrayList<Object>();
for (Object o : values) {
arrayList.add(o);
}
list = arrayList;
}
return newObjs;
return convert(list);
}

/**
* Same as {@link #convertToStringIfBytesRef} but on Iterable.
* @param objs the Iterable of input object
* @return the same input or a list of utf8 string if input was a list of type {@link BytesRef}
* Convert the list in a way that optimizes storage in the case that all
* elements are either integers or {@link String}s/{@link BytesRef}s. This
* is useful to help garbage collections for use-cases that involve sending
* very large terms queries to Elasticsearch. If the list does not only
* contain integers or {@link String}s, then a list is returned where all
* {@link String}s have been replaced with {@link BytesRef}s.
*/
private static List<Object> convertToStringListIfBytesRefList(Iterable<?> objs) {
if (objs == null) {
return null;
static List<?> convert(List<?> list) {
if (list.isEmpty()) {
return Collections.emptyList();
}
List<Object> newObjs = new ArrayList<>();
for (Object obj : objs) {
newObjs.add(convertToStringIfBytesRef(obj));

final boolean allNumbers = list.stream().allMatch(o -> o != null && INTEGER_TYPES.contains(o.getClass()));
if (allNumbers) {
final long[] elements = list.stream().mapToLong(o -> ((Number) o).longValue()).toArray();
return new AbstractList<Object>() {
@Override
public Object get(int index) {
return elements[index];
}
@Override
public int size() {
return elements.length;
}
};
}
return newObjs;

final boolean allStrings = list.stream().allMatch(o -> o != null && STRING_TYPES.contains(o.getClass()));
if (allStrings) {
final BytesRefBuilder builder = new BytesRefBuilder();
try (final BytesStreamOutput bytesOut = new BytesStreamOutput()) {
final int[] endOffsets = new int[list.size()];
int i = 0;
for (Object o : list) {
BytesRef b;
if (o instanceof BytesRef) {
b = (BytesRef) o;
} else {
builder.copyChars(o.toString());
b = builder.get();
}
bytesOut.writeBytes(b.bytes, b.offset, b.length);
if (i == 0) {
endOffsets[0] = b.length;
} else {
endOffsets[i] = Math.addExact(endOffsets[i-1], b.length);
}
++i;
}
final BytesReference bytes = bytesOut.bytes();
return new AbstractList<Object>() {
public Object get(int i) {
final int startOffset = i == 0 ? 0 : endOffsets[i-1];
final int endOffset = endOffsets[i];
return bytes.slice(startOffset, endOffset - startOffset).toBytesRef();
}
public int size() {
return endOffsets.length;
}
};
}
}

return list.stream().map(o -> o instanceof String ? new BytesRef(o.toString()) : o).collect(Collectors.toList());
}

/**
* Convert the internal {@link List} of values back to a user-friendly list.
* Integers are kept as-is since the terms query does not make any difference
* between {@link Integer}s and {@link Long}s, but {@link BytesRef}s are
* converted back to {@link String}s.
*/
static List<Object> convertBack(List<?> list) {
return new AbstractList<Object>() {
@Override
public int size() {
return list.size();
}
@Override
public Object get(int index) {
Object o = list.get(index);
if (o instanceof BytesRef) {
o = ((BytesRef) o).utf8ToString();
}
// we do not convert longs, all integer types are equivalent
// as far as this query is concerned
return o;
}
};
}

@Override
Expand All @@ -232,7 +321,7 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep
termsLookup.toXContent(builder, params);
builder.endObject();
} else {
builder.field(fieldName, convertToStringListIfBytesRefList(values));
builder.field(fieldName, convertBack(values));
}
printBoostAndQueryName(builder);
builder.endObject();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.get.GetRequest;
Expand All @@ -43,6 +44,7 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;

Expand Down Expand Up @@ -214,7 +216,7 @@ public void testNumeric() throws IOException {
TermsQueryBuilder builder = new TermsQueryBuilder("foo", new int[]{1, 3, 4});
TermsQueryBuilder copy = (TermsQueryBuilder) assertSerialization(builder);
List<Object> values = copy.values();
assertEquals(Arrays.asList(1, 3, 4), values);
assertEquals(Arrays.asList(1L, 3L, 4L), values);
}
{
TermsQueryBuilder builder = new TermsQueryBuilder("foo", new double[]{1, 3, 4});
Expand Down Expand Up @@ -297,5 +299,27 @@ protected boolean isCachable(TermsQueryBuilder queryBuilder) {
// that's why we return true here all the time
return super.isCachable(queryBuilder);
}

public void testConversion() {
List<Object> list = Arrays.asList();
assertSame(Collections.emptyList(), TermsQueryBuilder.convert(list));
assertEquals(list, TermsQueryBuilder.convertBack(TermsQueryBuilder.convert(list)));

list = Arrays.asList("abc");
assertEquals(Arrays.asList(new BytesRef("abc")), TermsQueryBuilder.convert(list));
assertEquals(list, TermsQueryBuilder.convertBack(TermsQueryBuilder.convert(list)));

list = Arrays.asList("abc", new BytesRef("def"));
assertEquals(Arrays.asList(new BytesRef("abc"), new BytesRef("def")), TermsQueryBuilder.convert(list));
assertEquals(Arrays.asList("abc", "def"), TermsQueryBuilder.convertBack(TermsQueryBuilder.convert(list)));

list = Arrays.asList(5, 42L);
assertEquals(Arrays.asList(5L, 42L), TermsQueryBuilder.convert(list));
assertEquals(Arrays.asList(5L, 42L), TermsQueryBuilder.convertBack(TermsQueryBuilder.convert(list)));

list = Arrays.asList(5, 42d);
assertEquals(Arrays.asList(5, 42d), TermsQueryBuilder.convert(list));
assertEquals(Arrays.asList(5, 42d), TermsQueryBuilder.convertBack(TermsQueryBuilder.convert(list)));
}
}

0 comments on commit 56faeb3

Please sign in to comment.