Skip to content

Commit

Permalink
Refactored GeoHashGrid unit tests (#37832)
Browse files Browse the repository at this point in the history
* Refactored GeoHashGrid unit tests

This change allows other grid aggregations to reuse the same tests.

The change mostly just moves code to the base classes, trying to
keep changes to a bare minimum.

* rename createInternalGeoHashGridBucket to createInternalGeoGridBucket

* indentation
  • Loading branch information
nyurik authored Jan 25, 2019
1 parent afd4618 commit f1e71be
Show file tree
Hide file tree
Showing 4 changed files with 328 additions and 203 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.geogrid;

import org.apache.lucene.document.LatLonDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.elasticsearch.common.CheckedConsumer;
import org.elasticsearch.index.mapper.GeoPointFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Consumer;

public abstract class GeoGridAggregatorTestCase<T extends InternalGeoGridBucket> extends AggregatorTestCase {

private static final String FIELD_NAME = "location";

/**
* Generate a random precision according to the rules of the given aggregation.
*/
protected abstract int randomPrecision();

/**
* Convert geo point into a hash string (bucket string ID)
*/
protected abstract String hashAsString(double lng, double lat, int precision);

/**
* Create a new named {@link GeoGridAggregationBuilder}-derived builder
*/
protected abstract GeoGridAggregationBuilder createBuilder(String name);

public void testNoDocs() throws IOException {
testCase(new MatchAllDocsQuery(), FIELD_NAME, randomPrecision(), iw -> {
// Intentionally not writing any docs
}, geoGrid -> {
assertEquals(0, geoGrid.getBuckets().size());
});
}

public void testFieldMissing() throws IOException {
testCase(new MatchAllDocsQuery(), "wrong_field", randomPrecision(), iw -> {
iw.addDocument(Collections.singleton(new LatLonDocValuesField(FIELD_NAME, 10D, 10D)));
}, geoGrid -> {
assertEquals(0, geoGrid.getBuckets().size());
});
}

public void testWithSeveralDocs() throws IOException {
int precision = randomPrecision();
int numPoints = randomIntBetween(8, 128);
Map<String, Integer> expectedCountPerGeoHash = new HashMap<>();
testCase(new MatchAllDocsQuery(), FIELD_NAME, precision, iw -> {
List<LatLonDocValuesField> points = new ArrayList<>();
Set<String> distinctHashesPerDoc = new HashSet<>();
for (int pointId = 0; pointId < numPoints; pointId++) {
double lat = (180d * randomDouble()) - 90d;
double lng = (360d * randomDouble()) - 180d;

points.add(new LatLonDocValuesField(FIELD_NAME, lat, lng));
String hash = hashAsString(lng, lat, precision);
if (distinctHashesPerDoc.contains(hash) == false) {
expectedCountPerGeoHash.put(hash, expectedCountPerGeoHash.getOrDefault(hash, 0) + 1);
}
distinctHashesPerDoc.add(hash);
if (usually()) {
iw.addDocument(points);
points.clear();
distinctHashesPerDoc.clear();
}
}
if (points.size() != 0) {
iw.addDocument(points);
}
}, geoHashGrid -> {
assertEquals(expectedCountPerGeoHash.size(), geoHashGrid.getBuckets().size());
for (GeoGrid.Bucket bucket : geoHashGrid.getBuckets()) {
assertEquals((long) expectedCountPerGeoHash.get(bucket.getKeyAsString()), bucket.getDocCount());
}
assertTrue(AggregationInspectionHelper.hasValue(geoHashGrid));
});
}

private void testCase(Query query, String field, int precision, CheckedConsumer<RandomIndexWriter, IOException> buildIndex,
Consumer<InternalGeoGrid<T>> verify) throws IOException {
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
buildIndex.accept(indexWriter);
indexWriter.close();

IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = newSearcher(indexReader, true, true);

GeoGridAggregationBuilder aggregationBuilder = createBuilder("_name").field(field);
aggregationBuilder.precision(precision);
MappedFieldType fieldType = new GeoPointFieldMapper.GeoPointFieldType();
fieldType.setHasDocValues(true);
fieldType.setName(FIELD_NAME);

Aggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
aggregator.preCollection();
indexSearcher.search(query, aggregator);
aggregator.postCollection();
verify.accept((InternalGeoGrid<T>) aggregator.buildAggregation(0L));

indexReader.close();
directory.close();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.geogrid;

import org.apache.lucene.index.IndexWriter;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.test.InternalMultiBucketAggregationTestCase;
import org.elasticsearch.search.aggregations.ParsedMultiBucketAggregation;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static org.hamcrest.Matchers.equalTo;

public abstract class GeoGridTestCase<B extends InternalGeoGridBucket, T extends InternalGeoGrid<B>>
extends InternalMultiBucketAggregationTestCase<T> {

/**
* Instantiate a {@link InternalGeoGrid}-derived class using the same parameters as constructor.
*/
protected abstract T createInternalGeoGrid(String name, int size, List<InternalGeoGridBucket> buckets,
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData);

/**
* Instantiate a {@link InternalGeoGridBucket}-derived class using the same parameters as constructor.
*/
protected abstract B createInternalGeoGridBucket(Long key, long docCount, InternalAggregations aggregations);

/**
* Encode longitude and latitude with a given precision as a long hash.
*/
protected abstract long longEncode(double lng, double lat, int precision);

/**
* Generate a random precision according to the rules of the given aggregation.
*/
protected abstract int randomPrecision();

@Override
protected int minNumberOfBuckets() {
return 1;
}

@Override
protected int maxNumberOfBuckets() {
return 3;
}

@Override
protected T createTestInstance(String name,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData,
InternalAggregations aggregations) {
final int precision = randomPrecision();
int size = randomNumberOfBuckets();
List<InternalGeoGridBucket> buckets = new ArrayList<>(size);
for (int i = 0; i < size; i++) {
double latitude = randomDoubleBetween(-90.0, 90.0, false);
double longitude = randomDoubleBetween(-180.0, 180.0, false);

long hashAsLong = longEncode(longitude, latitude, precision);
buckets.add(createInternalGeoGridBucket(hashAsLong, randomInt(IndexWriter.MAX_DOCS), aggregations));
}
return createInternalGeoGrid(name, size, buckets, pipelineAggregators, metaData);
}

@Override
protected void assertReduced(T reduced, List<T> inputs) {
Map<Long, List<B>> map = new HashMap<>();
for (T input : inputs) {
for (GeoGrid.Bucket bucketBase : input.getBuckets()) {
B bucket = (B) bucketBase;
List<B> buckets = map.get(bucket.hashAsLong);
if (buckets == null) {
map.put(bucket.hashAsLong, buckets = new ArrayList<>());
}
buckets.add(bucket);
}
}
List<B> expectedBuckets = new ArrayList<>();
for (Map.Entry<Long, List<B>> entry : map.entrySet()) {
long docCount = 0;
for (B bucket : entry.getValue()) {
docCount += bucket.docCount;
}
expectedBuckets.add(createInternalGeoGridBucket(entry.getKey(), docCount, InternalAggregations.EMPTY));
}
expectedBuckets.sort((first, second) -> {
int cmp = Long.compare(second.docCount, first.docCount);
if (cmp == 0) {
return second.compareTo(first);
}
return cmp;
});
int requestedSize = inputs.get(0).getRequiredSize();
expectedBuckets = expectedBuckets.subList(0, Math.min(requestedSize, expectedBuckets.size()));
assertEquals(expectedBuckets.size(), reduced.getBuckets().size());
for (int i = 0; i < reduced.getBuckets().size(); i++) {
GeoGrid.Bucket expected = expectedBuckets.get(i);
GeoGrid.Bucket actual = reduced.getBuckets().get(i);
assertEquals(expected.getDocCount(), actual.getDocCount());
assertEquals(expected.getKey(), actual.getKey());
}
}

@Override
protected Class<? extends ParsedMultiBucketAggregation> implementationClass() {
return ParsedGeoGrid.class;
}

@Override
protected T mutateInstance(T instance) {
String name = instance.getName();
int size = instance.getRequiredSize();
List<InternalGeoGridBucket> buckets = instance.getBuckets();
List<PipelineAggregator> pipelineAggregators = instance.pipelineAggregators();
Map<String, Object> metaData = instance.getMetaData();
switch (between(0, 3)) {
case 0:
name += randomAlphaOfLength(5);
break;
case 1:
buckets = new ArrayList<>(buckets);
buckets.add(
createInternalGeoGridBucket(randomNonNegativeLong(), randomInt(IndexWriter.MAX_DOCS), InternalAggregations.EMPTY));
break;
case 2:
size = size + between(1, 10);
break;
case 3:
if (metaData == null) {
metaData = new HashMap<>(1);
} else {
metaData = new HashMap<>(instance.getMetaData());
}
metaData.put(randomAlphaOfLength(15), randomInt());
break;
default:
throw new AssertionError("Illegal randomisation branch");
}
return createInternalGeoGrid(name, size, buckets, pipelineAggregators, metaData);
}

public void testCreateFromBuckets() {
InternalGeoGrid original = createTestInstance();
assertThat(original, equalTo(original.create(original.buckets)));
}
}
Loading

0 comments on commit f1e71be

Please sign in to comment.