Skip to content

Commit

Permalink
Merge branch '8.x' into backport/8.x/pr-112973
Browse files Browse the repository at this point in the history
  • Loading branch information
elasticmachine authored Sep 17, 2024
2 parents dc8b074 + d7b90e1 commit 6dc1e05
Show file tree
Hide file tree
Showing 72 changed files with 3,307 additions and 901 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
package org.elasticsearch.benchmark.tdigest;

import org.elasticsearch.tdigest.Sort;
import org.elasticsearch.tdigest.arrays.TDigestDoubleArray;
import org.elasticsearch.tdigest.arrays.TDigestIntArray;
import org.elasticsearch.tdigest.arrays.WrapperTDigestArrays;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
Expand All @@ -35,7 +38,6 @@
import org.openjdk.jmh.annotations.Threads;
import org.openjdk.jmh.annotations.Warmup;

import java.util.Arrays;
import java.util.Random;
import java.util.concurrent.TimeUnit;

Expand All @@ -49,7 +51,7 @@
@State(Scope.Thread)
public class SortBench {
private final int size = 100000;
private final double[] values = new double[size];
private final TDigestDoubleArray values = WrapperTDigestArrays.INSTANCE.newDoubleArray(size);

@Param({ "0", "1", "-1" })
public int sortDirection;
Expand All @@ -58,22 +60,22 @@ public class SortBench {
public void setup() {
Random prng = new Random(999983);
for (int i = 0; i < size; i++) {
values[i] = prng.nextDouble();
values.set(i, prng.nextDouble());
}
if (sortDirection > 0) {
Arrays.sort(values);
values.sort();
} else if (sortDirection < 0) {
Arrays.sort(values);
Sort.reverse(values, 0, values.length);
values.sort();
Sort.reverse(values, 0, values.size());
}
}

@Benchmark
public void quicksort() {
int[] order = new int[size];
public void stableSort() {
TDigestIntArray order = WrapperTDigestArrays.INSTANCE.newIntArray(size);
for (int i = 0; i < size; i++) {
order[i] = i;
order.set(i, i);
}
Sort.sort(order, values, null, values.length);
Sort.stableSort(order, values, values.size());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@

package org.elasticsearch.benchmark.tdigest;

import org.elasticsearch.tdigest.AVLTreeDigest;
import org.elasticsearch.tdigest.MergingDigest;
import org.elasticsearch.tdigest.TDigest;
import org.elasticsearch.tdigest.arrays.WrapperTDigestArrays;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
Expand Down Expand Up @@ -61,13 +61,19 @@ public enum TDigestFactory {
MERGE {
@Override
TDigest create(double compression) {
return new MergingDigest(compression, (int) (10 * compression));
return new MergingDigest(WrapperTDigestArrays.INSTANCE, compression, (int) (10 * compression));
}
},
AVL_TREE {
@Override
TDigest create(double compression) {
return new AVLTreeDigest(compression);
return TDigest.createAvlTreeDigest(WrapperTDigestArrays.INSTANCE, compression);
}
},
HYBRID {
@Override
TDigest create(double compression) {
return TDigest.createHybridDigest(WrapperTDigestArrays.INSTANCE, compression);
}
};

Expand All @@ -77,7 +83,7 @@ TDigest create(double compression) {
@Param({ "100", "300" })
double compression;

@Param({ "MERGE", "AVL_TREE" })
@Param({ "MERGE", "AVL_TREE", "HYBRID" })
TDigestFactory tdigestFactory;

@Param({ "NORMAL", "GAUSSIAN" })
Expand Down
5 changes: 5 additions & 0 deletions docs/changelog/112512.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 112512
summary: Add Completion Inference API for Alibaba Cloud AI Search Model
area: Machine Learning
type: enhancement
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/112677.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 112677
summary: Stream OpenAI Completion
area: Machine Learning
type: enhancement
issues: []
1 change: 1 addition & 0 deletions libs/tdigest/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@

module org.elasticsearch.tdigest {
exports org.elasticsearch.tdigest;
exports org.elasticsearch.tdigest.arrays;
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,11 @@

package org.elasticsearch.tdigest;

import org.elasticsearch.tdigest.arrays.TDigestArrays;
import org.elasticsearch.tdigest.arrays.TDigestDoubleArray;
import org.elasticsearch.tdigest.arrays.TDigestLongArray;

import java.util.AbstractCollection;
import java.util.Arrays;
import java.util.Iterator;

/**
Expand All @@ -32,20 +35,20 @@ final class AVLGroupTree extends AbstractCollection<Centroid> {
/* For insertions into the tree */
private double centroid;
private long count;
private double[] centroids;
private long[] counts;
private long[] aggregatedCounts;
private final TDigestDoubleArray centroids;
private final TDigestLongArray counts;
private final TDigestLongArray aggregatedCounts;
private final IntAVLTree tree;

AVLGroupTree() {
tree = new IntAVLTree() {
AVLGroupTree(TDigestArrays arrays) {
tree = new IntAVLTree(arrays) {

@Override
protected void resize(int newCapacity) {
super.resize(newCapacity);
centroids = Arrays.copyOf(centroids, newCapacity);
counts = Arrays.copyOf(counts, newCapacity);
aggregatedCounts = Arrays.copyOf(aggregatedCounts, newCapacity);
centroids.resize(newCapacity);
counts.resize(newCapacity);
aggregatedCounts.resize(newCapacity);
}

@Override
Expand All @@ -56,13 +59,13 @@ protected void merge(int node) {

@Override
protected void copy(int node) {
centroids[node] = centroid;
counts[node] = count;
centroids.set(node, centroid);
counts.set(node, count);
}

@Override
protected int compare(int node) {
if (centroid < centroids[node]) {
if (centroid < centroids.get(node)) {
return -1;
} else {
// upon equality, the newly added node is considered greater
Expand All @@ -73,13 +76,13 @@ protected int compare(int node) {
@Override
protected void fixAggregates(int node) {
super.fixAggregates(node);
aggregatedCounts[node] = counts[node] + aggregatedCounts[left(node)] + aggregatedCounts[right(node)];
aggregatedCounts.set(node, counts.get(node) + aggregatedCounts.get(left(node)) + aggregatedCounts.get(right(node)));
}

};
centroids = new double[tree.capacity()];
counts = new long[tree.capacity()];
aggregatedCounts = new long[tree.capacity()];
centroids = arrays.newDoubleArray(tree.capacity());
counts = arrays.newLongArray(tree.capacity());
aggregatedCounts = arrays.newLongArray(tree.capacity());
}

/**
Expand Down Expand Up @@ -107,14 +110,14 @@ public int next(int node) {
* Return the mean for the provided node.
*/
public double mean(int node) {
return centroids[node];
return centroids.get(node);
}

/**
* Return the count for the provided node.
*/
public long count(int node) {
return counts[node];
return counts.get(node);
}

/**
Expand Down Expand Up @@ -167,7 +170,7 @@ public int floorSum(long sum) {
int floor = IntAVLTree.NIL;
for (int node = tree.root(); node != IntAVLTree.NIL;) {
final int left = tree.left(node);
final long leftCount = aggregatedCounts[left];
final long leftCount = aggregatedCounts.get(left);
if (leftCount <= sum) {
floor = node;
sum -= leftCount + count(node);
Expand Down Expand Up @@ -199,11 +202,11 @@ public int last() {
*/
public long headSum(int node) {
final int left = tree.left(node);
long sum = aggregatedCounts[left];
long sum = aggregatedCounts.get(left);
for (int n = node, p = tree.parent(node); p != IntAVLTree.NIL; n = p, p = tree.parent(n)) {
if (n == tree.right(p)) {
final int leftP = tree.left(p);
sum += counts[p] + aggregatedCounts[leftP];
sum += counts.get(p) + aggregatedCounts.get(leftP);
}
}
return sum;
Expand Down Expand Up @@ -243,7 +246,7 @@ public void remove() {
* Return the total count of points that have been added to the tree.
*/
public long sum() {
return aggregatedCounts[tree.root()];
return aggregatedCounts.get(tree.root());
}

void checkBalance() {
Expand All @@ -255,7 +258,9 @@ void checkAggregates() {
}

private void checkAggregates(int node) {
assert aggregatedCounts[node] == counts[node] + aggregatedCounts[tree.left(node)] + aggregatedCounts[tree.right(node)];
assert aggregatedCounts.get(node) == counts.get(node) + aggregatedCounts.get(tree.left(node)) + aggregatedCounts.get(
tree.right(node)
);
if (node != IntAVLTree.NIL) {
checkAggregates(tree.left(node));
checkAggregates(tree.right(node));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

package org.elasticsearch.tdigest;

import org.elasticsearch.tdigest.arrays.TDigestArrays;

import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
Expand All @@ -29,6 +31,8 @@
import static org.elasticsearch.tdigest.IntAVLTree.NIL;

public class AVLTreeDigest extends AbstractTDigest {
private final TDigestArrays arrays;

final Random gen = new Random();
private final double compression;
private AVLGroupTree summary;
Expand All @@ -46,9 +50,10 @@ public class AVLTreeDigest extends AbstractTDigest {
* quantiles. Conversely, you should expect to track about 5 N centroids for this
* accuracy.
*/
public AVLTreeDigest(double compression) {
AVLTreeDigest(TDigestArrays arrays, double compression) {
this.arrays = arrays;
this.compression = compression;
summary = new AVLGroupTree();
summary = new AVLGroupTree(arrays);
}

/**
Expand Down Expand Up @@ -149,7 +154,7 @@ public void compress() {
needsCompression = false;

AVLGroupTree centroids = summary;
this.summary = new AVLGroupTree();
this.summary = new AVLGroupTree(arrays);

final int[] nodes = new int[centroids.size()];
nodes[0] = centroids.first();
Expand Down
10 changes: 10 additions & 0 deletions libs/tdigest/src/main/java/org/elasticsearch/tdigest/Dist.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

package org.elasticsearch.tdigest;

import org.elasticsearch.tdigest.arrays.TDigestDoubleArray;

import java.util.List;
import java.util.function.Function;

Expand Down Expand Up @@ -102,6 +104,10 @@ public static double cdf(final double x, List<Double> data) {
return cdf(x, data.size(), data::get);
}

public static double cdf(final double x, TDigestDoubleArray data) {
return cdf(x, data.size(), data::get);
}

private static double quantile(final double q, final int length, Function<Integer, Double> elementGetter) {
if (length == 0) {
return Double.NaN;
Expand Down Expand Up @@ -133,4 +139,8 @@ public static double quantile(final double q, double[] data) {
public static double quantile(final double q, List<Double> data) {
return quantile(q, data.size(), data::get);
}

public static double quantile(final double q, TDigestDoubleArray data) {
return quantile(q, data.size(), data::get);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

package org.elasticsearch.tdigest;

import org.elasticsearch.tdigest.arrays.TDigestArrays;

import java.util.Collection;

/**
Expand All @@ -32,14 +34,16 @@
*/
public class HybridDigest extends AbstractTDigest {

private final TDigestArrays arrays;

// See MergingDigest's compression param.
private final double compression;

// Indicates the sample size over which it switches from SortingDigest to MergingDigest.
private final long maxSortingSize;

// This is set to null when the implementation switches to MergingDigest.
private SortingDigest sortingDigest = new SortingDigest();
private SortingDigest sortingDigest;

// This gets initialized when the implementation switches to MergingDigest.
private MergingDigest mergingDigest;
Expand All @@ -51,9 +55,11 @@ public class HybridDigest extends AbstractTDigest {
* @param compression The compression factor for the MergingDigest
* @param maxSortingSize The sample size limit for switching from a {@link SortingDigest} to a {@link MergingDigest} implementation
*/
HybridDigest(double compression, long maxSortingSize) {
HybridDigest(TDigestArrays arrays, double compression, long maxSortingSize) {
this.arrays = arrays;
this.compression = compression;
this.maxSortingSize = maxSortingSize;
this.sortingDigest = new SortingDigest(arrays);
}

/**
Expand All @@ -62,11 +68,11 @@ public class HybridDigest extends AbstractTDigest {
*
* @param compression The compression factor for the MergingDigest
*/
HybridDigest(double compression) {
HybridDigest(TDigestArrays arrays, double compression) {
// The default maxSortingSize is calculated so that the SortingDigest will have comparable size with the MergingDigest
// at the point where implementations switch, e.g. for default compression 100 SortingDigest allocates ~16kB and MergingDigest
// allocates ~15kB.
this(compression, Math.round(compression) * 20);
this(arrays, compression, Math.round(compression) * 20);
}

@Override
Expand Down Expand Up @@ -98,9 +104,9 @@ public void reserve(long size) {
// Check if we need to switch implementations.
assert sortingDigest != null;
if (sortingDigest.size() + size >= maxSortingSize) {
mergingDigest = new MergingDigest(compression);
for (double value : sortingDigest.values) {
mergingDigest.add(value);
mergingDigest = new MergingDigest(arrays, compression);
for (int i = 0; i < sortingDigest.values.size(); i++) {
mergingDigest.add(sortingDigest.values.get(i));
}
mergingDigest.reserve(size);
// Release the allocated SortingDigest.
Expand Down
Loading

0 comments on commit 6dc1e05

Please sign in to comment.